LCOV - 0e5ff9b9b45a657aea12440478dc002e9b01f138 vs 0123ce131fca454009439dfa3b2266d1d40737d7

LCOV - differential code coverage report

Current view:	top level - src/backend/utils/adt - regexp.c (source / functions)		Coverage	Total	Hit	LBC	UBC	GBC	GNC	CBC	EUB	ECB	DCB
Current:	0e5ff9b9b45a657aea12440478dc002e9b01f138 vs 0123ce131fca454009439dfa3b2266d1d40737d7	Lines:	92.0 %	660	607		53		10	597			10
Current Date:	2026-03-14 14:10:32 -0400	Functions:	98.0 %	51	50		1		5	45
Baseline:	lcov-20260315-024220-baseline	Branches:	80.0 %	375	300	1	74	1		299	104	50
Baseline Date:	2026-03-14 15:27:56 +0100	Line coverage date bins:
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	(30,360] days:	100.0 %	28	28				10	18
		(360..) days:	91.6 %	632	579		53			579
		Function coverage date bins:
		(360..) days:	98.0 %	51	50		1		5	45
		Branch coverage date bins:
		(30,360] days:	54.2 %	24	13		1			13	8	2
		(360..) days:	56.8 %	505	287	1	73	1		286	96	48

 Age         Owner                    Branch data    TLA  Line data    Source code

                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * regexp.c
                                  4                 :                :  *    Postgres' interface to the regular expression package.
                                  5                 :                :  *
                                  6                 :                :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
                                  7                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :                :  *
                                  9                 :                :  *
                                 10                 :                :  * IDENTIFICATION
                                 11                 :                :  *    src/backend/utils/adt/regexp.c
                                 12                 :                :  *
                                 13                 :                :  *      Alistair Crooks added the code for the regex caching
                                 14                 :                :  *      agc - cached the regular expressions used - there's a good chance
                                 15                 :                :  *      that we'll get a hit, so this saves a compile step for every
                                 16                 :                :  *      attempted match. I haven't actually measured the speed improvement,
                                 17                 :                :  *      but it `looks' a lot quicker visually when watching regression
                                 18                 :                :  *      test output.
                                 19                 :                :  *
                                 20                 :                :  *      agc - incorporated Keith Bostic's Berkeley regex code into
                                 21                 :                :  *      the tree for all ports. To distinguish this regex code from any that
                                 22                 :                :  *      is existent on a platform, I've prepended the string "pg_" to
                                 23                 :                :  *      the functions regcomp, regerror, regexec and regfree.
                                 24                 :                :  *      Fixed a bug that was originally a typo by me, where `i' was used
                                 25                 :                :  *      instead of `oldest' when compiling regular expressions - benign
                                 26                 :                :  *      results mostly, although occasionally it bit you...
                                 27                 :                :  *
                                 28                 :                :  *-------------------------------------------------------------------------
                                 29                 :                :  */
                                 30                 :                : #include "postgres.h"
                                 31                 :                : 
                                 32                 :                : #include "catalog/pg_type.h"
                                 33                 :                : #include "funcapi.h"
                                 34                 :                : #include "regex/regex.h"
                                 35                 :                : #include "utils/array.h"
                                 36                 :                : #include "utils/builtins.h"
                                 37                 :                : #include "utils/memutils.h"
                                 38                 :                : #include "utils/varlena.h"
                                 39                 :                : 
                                 40                 :                : #define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \
                                 41                 :                :     (PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
                                 42                 :                : 
                                 43                 :                : 
                                 44                 :                : /* all the options of interest for regex functions */
                                 45                 :                : typedef struct pg_re_flags
                                 46                 :                : {
                                 47                 :                :     int         cflags;         /* compile flags for Spencer's regex code */
                                 48                 :                :     bool        glob;           /* do it globally (for each occurrence) */
                                 49                 :                : } pg_re_flags;
                                 50                 :                : 
                                 51                 :                : /* cross-call state for regexp_match and regexp_split functions */
                                 52                 :                : typedef struct regexp_matches_ctx
                                 53                 :                : {
                                 54                 :                :     text       *orig_str;       /* data string in original TEXT form */
                                 55                 :                :     int         nmatches;       /* number of places where pattern matched */
                                 56                 :                :     int         npatterns;      /* number of capturing subpatterns */
                                 57                 :                :     /* We store start char index and end+1 char index for each match */
                                 58                 :                :     /* so the number of entries in match_locs is nmatches * npatterns * 2 */
                                 59                 :                :     int        *match_locs;     /* 0-based character indexes */
                                 60                 :                :     int         next_match;     /* 0-based index of next match to process */
                                 61                 :                :     /* workspace for build_regexp_match_result() */
                                 62                 :                :     Datum      *elems;          /* has npatterns elements */
                                 63                 :                :     bool       *nulls;          /* has npatterns elements */
                                 64                 :                :     pg_wchar   *wide_str;       /* wide-char version of original string */
                                 65                 :                :     char       *conv_buf;       /* conversion buffer, if needed */
                                 66                 :                :     int         conv_bufsiz;    /* size thereof */
                                 67                 :                : } regexp_matches_ctx;
                                 68                 :                : 
                                 69                 :                : /*
                                 70                 :                :  * We cache precompiled regular expressions using a "self organizing list"
                                 71                 :                :  * structure, in which recently-used items tend to be near the front.
                                 72                 :                :  * Whenever we use an entry, it's moved up to the front of the list.
                                 73                 :                :  * Over time, an item's average position corresponds to its frequency of use.
                                 74                 :                :  *
                                 75                 :                :  * When we first create an entry, it's inserted at the front of
                                 76                 :                :  * the array, dropping the entry at the end of the array if necessary to
                                 77                 :                :  * make room.  (This might seem to be weighting the new entry too heavily,
                                 78                 :                :  * but if we insert new entries further back, we'll be unable to adjust to
                                 79                 :                :  * a sudden shift in the query mix where we are presented with MAX_CACHED_RES
                                 80                 :                :  * never-before-seen items used circularly.  We ought to be able to handle
                                 81                 :                :  * that case, so we have to insert at the front.)
                                 82                 :                :  *
                                 83                 :                :  * Knuth mentions a variant strategy in which a used item is moved up just
                                 84                 :                :  * one place in the list.  Although he says this uses fewer comparisons on
                                 85                 :                :  * average, it seems not to adapt very well to the situation where you have
                                 86                 :                :  * both some reusable patterns and a steady stream of non-reusable patterns.
                                 87                 :                :  * A reusable pattern that isn't used at least as often as non-reusable
                                 88                 :                :  * patterns are seen will "fail to keep up" and will drop off the end of the
                                 89                 :                :  * cache.  With move-to-front, a reusable pattern is guaranteed to stay in
                                 90                 :                :  * the cache as long as it's used at least once in every MAX_CACHED_RES uses.
                                 91                 :                :  */
                                 92                 :                : 
                                 93                 :                : /* this is the maximum number of cached regular expressions */
                                 94                 :                : #ifndef MAX_CACHED_RES
                                 95                 :                : #define MAX_CACHED_RES  32
                                 96                 :                : #endif
                                 97                 :                : 
                                 98                 :                : /* A parent memory context for regular expressions. */
                                 99                 :                : static MemoryContext RegexpCacheMemoryContext;
                                100                 :                : 
                                101                 :                : /* this structure describes one cached regular expression */
                                102                 :                : typedef struct cached_re_str
                                103                 :                : {
                                104                 :                :     MemoryContext cre_context;  /* memory context for this regexp */
                                105                 :                :     char       *cre_pat;        /* original RE (not null terminated!) */
                                106                 :                :     int         cre_pat_len;    /* length of original RE, in bytes */
                                107                 :                :     int         cre_flags;      /* compile flags: extended,icase etc */
                                108                 :                :     Oid         cre_collation;  /* collation to use */
                                109                 :                :     regex_t     cre_re;         /* the compiled regular expression */
                                110                 :                : } cached_re_str;
                                111                 :                : 
                                112                 :                : static int  num_res = 0;        /* # of cached re's */
                                113                 :                : static cached_re_str re_array[MAX_CACHED_RES];  /* cached re's */
                                114                 :                : 
                                115                 :                : 
                                116                 :                : /* Local functions */
                                117                 :                : static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
                                118                 :                :                                                 pg_re_flags *re_flags,
                                119                 :                :                                                 int start_search,
                                120                 :                :                                                 Oid collation,
                                121                 :                :                                                 bool use_subpatterns,
                                122                 :                :                                                 bool ignore_degenerate,
                                123                 :                :                                                 bool fetching_unmatched);
                                124                 :                : static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx);
                                125                 :                : static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
                                126                 :                : 
                                127                 :                : 
                                128                 :                : /*
                                129                 :                :  * RE_compile_and_cache - compile a RE, caching if possible
                                130                 :                :  *
                                131                 :                :  * Returns regex_t *
                                132                 :                :  *
                                133                 :                :  *  text_re --- the pattern, expressed as a TEXT object
                                134                 :                :  *  cflags --- compile options for the pattern
                                135                 :                :  *  collation --- collation to use for LC_CTYPE-dependent behavior
                                136                 :                :  *
                                137                 :                :  * Pattern is given in the database encoding.  We internally convert to
                                138                 :                :  * an array of pg_wchar, which is what Spencer's regex package wants.
                                139                 :                :  */
                                140                 :                : regex_t *
 5453 tgl@sss.pgh.pa.us         141                 :CBC     3739756 : RE_compile_and_cache(text *text_re, int cflags, Oid collation)
                                142                 :                : {
 6750                           143   [ -  +  -  -  :        3739756 :     int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
                                     -  -  -  -  +  
                                                 + ]
                                144         [ +  + ]:        3739756 :     char       *text_re_val = VARDATA_ANY(text_re);
                                145                 :                :     pg_wchar   *pattern;
                                146                 :                :     int         pattern_len;
                                147                 :                :     int         i;
                                148                 :                :     int         regcomp_result;
                                149                 :                :     cached_re_str re_temp;
                                150                 :                :     char        errMsg[100];
                                151                 :                :     MemoryContext oldcontext;
                                152                 :                : 
                                153                 :                :     /*
                                154                 :                :      * Look for a match among previously compiled REs.  Since the data
                                155                 :                :      * structure is self-organizing with most-used entries at the front, our
                                156                 :                :      * search strategy can just be to scan from the front.
                                157                 :                :      */
 8439                           158         [ +  + ]:        4051865 :     for (i = 0; i < num_res; i++)
                                159                 :                :     {
 6750                           160         [ +  + ]:        4048577 :         if (re_array[i].cre_pat_len == text_re_len &&
                                161         [ +  + ]:        3744081 :             re_array[i].cre_flags == cflags &&
 5453                           162         [ +  + ]:        3743473 :             re_array[i].cre_collation == collation &&
 6750                           163         [ +  + ]:        3743285 :             memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
                                164                 :                :         {
                                165                 :                :             /*
                                166                 :                :              * Found a match; move it to front if not there already.
                                167                 :                :              */
 8439                           168         [ +  + ]:        3736468 :             if (i > 0)
                                169                 :                :             {
                                170                 :         241711 :                 re_temp = re_array[i];
                                171                 :         241711 :                 memmove(&re_array[1], &re_array[0], i * sizeof(cached_re_str));
                                172                 :         241711 :                 re_array[0] = re_temp;
                                173                 :                :             }
                                174                 :                : 
 7453                           175                 :        3736468 :             return &re_array[0].cre_re;
                                176                 :                :         }
                                177                 :                :     }
                                178                 :                : 
                                179                 :                :     /* Set up the cache memory on first go through. */
 1072 tmunro@postgresql.or      180         [ +  + ]:           3288 :     if (unlikely(RegexpCacheMemoryContext == NULL))
                                181                 :            857 :         RegexpCacheMemoryContext =
                                182                 :            857 :             AllocSetContextCreate(TopMemoryContext,
                                183                 :                :                                   "RegexpCacheMemoryContext",
                                184                 :                :                                   ALLOCSET_SMALL_SIZES);
                                185                 :                : 
                                186                 :                :     /*
                                187                 :                :      * Couldn't find it, so try to compile the new RE.  To avoid leaking
                                188                 :                :      * resources on failure, we build into the re_temp local.
                                189                 :                :      */
                                190                 :                : 
                                191                 :                :     /* Convert pattern string to wide characters */
   95 michael@paquier.xyz       192                 :GNC        3288 :     pattern = palloc_array(pg_wchar, text_re_len + 1);
 6750 tgl@sss.pgh.pa.us         193                 :CBC        3288 :     pattern_len = pg_mb2wchar_with_len(text_re_val,
                                194                 :                :                                        pattern,
                                195                 :                :                                        text_re_len);
                                196                 :                : 
                                197                 :                :     /*
                                198                 :                :      * Make a memory context for this compiled regexp.  This is initially a
                                199                 :                :      * child of the current memory context, so it will be cleaned up
                                200                 :                :      * automatically if compilation is interrupted and throws an ERROR. We'll
                                201                 :                :      * re-parent it under the longer lived cache context if we make it to the
                                202                 :                :      * bottom of this function.
                                203                 :                :      */
 1072 tmunro@postgresql.or      204                 :           3288 :     re_temp.cre_context = AllocSetContextCreate(CurrentMemoryContext,
                                205                 :                :                                                 "RegexpMemoryContext",
                                206                 :                :                                                 ALLOCSET_SMALL_SIZES);
                                207                 :           3288 :     oldcontext = MemoryContextSwitchTo(re_temp.cre_context);
                                208                 :                : 
 8439 tgl@sss.pgh.pa.us         209                 :           3288 :     regcomp_result = pg_regcomp(&re_temp.cre_re,
                                210                 :                :                                 pattern,
                                211                 :                :                                 pattern_len,
                                212                 :                :                                 cflags,
                                213                 :                :                                 collation);
                                214                 :                : 
                                215                 :           3276 :     pfree(pattern);
                                216                 :                : 
 7781                           217         [ +  + ]:           3276 :     if (regcomp_result != REG_OKAY)
                                218                 :                :     {
                                219                 :                :         /* re didn't compile (no need for pg_regfree, if so) */
 8439                           220                 :             18 :         pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg));
 8267                           221         [ +  - ]:             18 :         ereport(ERROR,
                                222                 :                :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                                223                 :                :                  errmsg("invalid regular expression: %s", errMsg)));
                                224                 :                :     }
                                225                 :                : 
                                226                 :                :     /* Copy the pattern into the per-regexp memory context. */
 1072 tmunro@postgresql.or      227                 :           3258 :     re_temp.cre_pat = palloc(text_re_len + 1);
                                228                 :           3258 :     memcpy(re_temp.cre_pat, text_re_val, text_re_len);
                                229                 :                : 
                                230                 :                :     /*
                                231                 :                :      * NUL-terminate it only for the benefit of the identifier used for the
                                232                 :                :      * memory context, visible in the pg_backend_memory_contexts view.
                                233                 :                :      */
                                234                 :           3258 :     re_temp.cre_pat[text_re_len] = 0;
                                235                 :           3258 :     MemoryContextSetIdentifier(re_temp.cre_context, re_temp.cre_pat);
                                236                 :                : 
 6750 tgl@sss.pgh.pa.us         237                 :           3258 :     re_temp.cre_pat_len = text_re_len;
 8439                           238                 :           3258 :     re_temp.cre_flags = cflags;
 5453                           239                 :           3258 :     re_temp.cre_collation = collation;
                                240                 :                : 
                                241                 :                :     /*
                                242                 :                :      * Okay, we have a valid new item in re_temp; insert it into the storage
                                243                 :                :      * array.  Discard last entry if needed.
                                244                 :                :      */
 8439                           245         [ +  + ]:           3258 :     if (num_res >= MAX_CACHED_RES)
                                246                 :                :     {
                                247                 :            462 :         --num_res;
                                248         [ -  + ]:            462 :         Assert(num_res < MAX_CACHED_RES);
                                249                 :                :         /* Delete the memory context holding the regexp and pattern. */
 1072 tmunro@postgresql.or      250                 :            462 :         MemoryContextDelete(re_array[num_res].cre_context);
                                251                 :                :     }
                                252                 :                : 
                                253                 :                :     /* Re-parent the memory context to our long-lived cache context. */
                                254                 :           3258 :     MemoryContextSetParent(re_temp.cre_context, RegexpCacheMemoryContext);
                                255                 :                : 
 8439 tgl@sss.pgh.pa.us         256         [ +  + ]:           3258 :     if (num_res > 0)
                                257                 :           2401 :         memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str));
                                258                 :                : 
                                259                 :           3258 :     re_array[0] = re_temp;
                                260                 :           3258 :     num_res++;
                                261                 :                : 
 1072 tmunro@postgresql.or      262                 :           3258 :     MemoryContextSwitchTo(oldcontext);
                                263                 :                : 
 7453 tgl@sss.pgh.pa.us         264                 :           3258 :     return &re_array[0].cre_re;
                                265                 :                : }
                                266                 :                : 
                                267                 :                : /*
                                268                 :                :  * RE_wchar_execute - execute a RE on pg_wchar data
                                269                 :                :  *
                                270                 :                :  * Returns true on match, false on no match
                                271                 :                :  *
                                272                 :                :  *  re --- the compiled pattern as returned by RE_compile_and_cache
                                273                 :                :  *  data --- the data to match against (need not be null-terminated)
                                274                 :                :  *  data_len --- the length of the data string
                                275                 :                :  *  start_search -- the offset in the data to start searching
                                276                 :                :  *  nmatch, pmatch  --- optional return area for match details
                                277                 :                :  *
                                278                 :                :  * Data is given as array of pg_wchar which is what Spencer's regex package
                                279                 :                :  * wants.
                                280                 :                :  */
                                281                 :                : static bool
 6935 neilc@samurai.com         282                 :        4167804 : RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
                                283                 :                :                  int start_search, int nmatch, regmatch_t *pmatch)
                                284                 :                : {
                                285                 :                :     int         regexec_result;
                                286                 :                :     char        errMsg[100];
                                287                 :                : 
                                288                 :                :     /* Perform RE match and return result */
 7453 tgl@sss.pgh.pa.us         289                 :        4167804 :     regexec_result = pg_regexec(re,
                                290                 :                :                                 data,
                                291                 :                :                                 data_len,
                                292                 :                :                                 start_search,
                                293                 :                :                                 NULL,   /* no details */
                                294                 :                :                                 nmatch,
                                295                 :                :                                 pmatch,
                                296                 :                :                                 0);
                                297                 :                : 
 7781                           298   [ +  +  -  + ]:        4167804 :     if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
                                299                 :                :     {
                                300                 :                :         /* re failed??? */
 7453 tgl@sss.pgh.pa.us         301                 :UBC           0 :         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
 7781                           302         [ #  # ]:              0 :         ereport(ERROR,
                                303                 :                :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                                304                 :                :                  errmsg("regular expression failed: %s", errMsg)));
                                305                 :                :     }
                                306                 :                : 
 7781 tgl@sss.pgh.pa.us         307                 :CBC     4167804 :     return (regexec_result == REG_OKAY);
                                308                 :                : }
                                309                 :                : 
                                310                 :                : /*
                                311                 :                :  * RE_execute - execute a RE
                                312                 :                :  *
                                313                 :                :  * Returns true on match, false on no match
                                314                 :                :  *
                                315                 :                :  *  re --- the compiled pattern as returned by RE_compile_and_cache
                                316                 :                :  *  dat --- the data to match against (need not be null-terminated)
                                317                 :                :  *  dat_len --- the length of the data string
                                318                 :                :  *  nmatch, pmatch  --- optional return area for match details
                                319                 :                :  *
                                320                 :                :  * Data is given in the database encoding.  We internally
                                321                 :                :  * convert to array of pg_wchar which is what Spencer's regex package wants.
                                322                 :                :  */
                                323                 :                : static bool
 6935 neilc@samurai.com         324                 :        3619092 : RE_execute(regex_t *re, char *dat, int dat_len,
                                325                 :                :            int nmatch, regmatch_t *pmatch)
                                326                 :                : {
                                327                 :                :     pg_wchar   *data;
                                328                 :                :     int         data_len;
                                329                 :                :     bool        match;
                                330                 :                : 
                                331                 :                :     /* Convert data string to wide characters */
   95 michael@paquier.xyz       332                 :GNC     3619092 :     data = palloc_array(pg_wchar, dat_len + 1);
 6935 neilc@samurai.com         333                 :CBC     3619092 :     data_len = pg_mb2wchar_with_len(dat, data, dat_len);
                                334                 :                : 
                                335                 :                :     /* Perform RE match and return result */
                                336                 :        3619092 :     match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);
                                337                 :                : 
                                338                 :        3619092 :     pfree(data);
                                339                 :        3619092 :     return match;
                                340                 :                : }
                                341                 :                : 
                                342                 :                : /*
                                343                 :                :  * RE_compile_and_execute - compile and execute a RE
                                344                 :                :  *
                                345                 :                :  * Returns true on match, false on no match
                                346                 :                :  *
                                347                 :                :  *  text_re --- the pattern, expressed as a TEXT object
                                348                 :                :  *  dat --- the data to match against (need not be null-terminated)
                                349                 :                :  *  dat_len --- the length of the data string
                                350                 :                :  *  cflags --- compile options for the pattern
                                351                 :                :  *  collation --- collation to use for LC_CTYPE-dependent behavior
                                352                 :                :  *  nmatch, pmatch  --- optional return area for match details
                                353                 :                :  *
                                354                 :                :  * Both pattern and data are given in the database encoding.  We internally
                                355                 :                :  * convert to array of pg_wchar which is what Spencer's regex package wants.
                                356                 :                :  */
                                357                 :                : bool
                                358                 :        3618281 : RE_compile_and_execute(text *text_re, char *dat, int dat_len,
                                359                 :                :                        int cflags, Oid collation,
                                360                 :                :                        int nmatch, regmatch_t *pmatch)
                                361                 :                : {
                                362                 :                :     regex_t    *re;
                                363                 :                : 
                                364                 :                :     /* Use REG_NOSUB if caller does not want sub-match details */
 1679 tgl@sss.pgh.pa.us         365         [ +  - ]:        3618281 :     if (nmatch < 2)
                                366                 :        3618281 :         cflags |= REG_NOSUB;
                                367                 :                : 
                                368                 :                :     /* Compile RE */
 5453                           369                 :        3618281 :     re = RE_compile_and_cache(text_re, cflags, collation);
                                370                 :                : 
 6935 neilc@samurai.com         371                 :        3618269 :     return RE_execute(re, dat, dat_len, nmatch, pmatch);
                                372                 :                : }
                                373                 :                : 
                                374                 :                : 
                                375                 :                : /*
                                376                 :                :  * parse_re_flags - parse the options argument of regexp_match and friends
                                377                 :                :  *
                                378                 :                :  *  flags --- output argument, filled with desired options
                                379                 :                :  *  opts --- TEXT object, or NULL for defaults
                                380                 :                :  *
                                381                 :                :  * This accepts all the options allowed by any of the callers; callers that
                                382                 :                :  * don't want some have to reject them after the fact.
                                383                 :                :  */
                                384                 :                : static void
 6695 bruce@momjian.us          385                 :         104936 : parse_re_flags(pg_re_flags *flags, text *opts)
                                386                 :                : {
                                387                 :                :     /* regex flavor is always folded into the compile flags */
 5989 tgl@sss.pgh.pa.us         388                 :         104936 :     flags->cflags = REG_ADVANCED;
 6791                           389                 :         104936 :     flags->glob = false;
                                390                 :                : 
 6935 neilc@samurai.com         391         [ +  + ]:         104936 :     if (opts)
                                392                 :                :     {
 6695 bruce@momjian.us          393         [ -  + ]:           2375 :         char       *opt_p = VARDATA_ANY(opts);
                                394   [ -  +  -  -  :           2375 :         int         opt_len = VARSIZE_ANY_EXHDR(opts);
                                     -  -  -  -  -  
                                                 + ]
                                395                 :                :         int         i;
                                396                 :                : 
 6935 neilc@samurai.com         397         [ +  + ]:           5362 :         for (i = 0; i < opt_len; i++)
                                398                 :                :         {
                                399   [ +  -  +  -  :           2999 :             switch (opt_p[i])
                                     +  +  -  -  +  
                                        -  -  +  + ]
                                400                 :                :             {
                                401                 :           2214 :                 case 'g':
                                402                 :           2214 :                     flags->glob = true;
                                403                 :           2214 :                     break;
 6695 bruce@momjian.us          404                 :UBC           0 :                 case 'b':       /* BREs (but why???) */
 6791 tgl@sss.pgh.pa.us         405                 :              0 :                     flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED | REG_QUOTE);
                                406                 :              0 :                     break;
 6695 bruce@momjian.us          407                 :CBC           5 :                 case 'c':       /* case sensitive */
 6791 tgl@sss.pgh.pa.us         408                 :              5 :                     flags->cflags &= ~REG_ICASE;
                                409                 :              5 :                     break;
 6695 bruce@momjian.us          410                 :UBC           0 :                 case 'e':       /* plain EREs */
 6791 tgl@sss.pgh.pa.us         411                 :              0 :                     flags->cflags |= REG_EXTENDED;
                                412                 :              0 :                     flags->cflags &= ~(REG_ADVANCED | REG_QUOTE);
                                413                 :              0 :                     break;
 6695 bruce@momjian.us          414                 :CBC         146 :                 case 'i':       /* case insensitive */
 6935 neilc@samurai.com         415                 :            146 :                     flags->cflags |= REG_ICASE;
                                416                 :            146 :                     break;
 6695 bruce@momjian.us          417                 :            613 :                 case 'm':       /* Perloid synonym for n */
                                418                 :                :                 case 'n':       /* \n affects ^ $ . [^ */
 6935 neilc@samurai.com         419                 :            613 :                     flags->cflags |= REG_NEWLINE;
                                420                 :            613 :                     break;
 6695 bruce@momjian.us          421                 :UBC           0 :                 case 'p':       /* ~Perl, \n affects . [^ */
 6935 neilc@samurai.com         422                 :              0 :                     flags->cflags |= REG_NLSTOP;
                                423                 :              0 :                     flags->cflags &= ~REG_NLANCH;
                                424                 :              0 :                     break;
 6695 bruce@momjian.us          425                 :              0 :                 case 'q':       /* literal string */
 6791 tgl@sss.pgh.pa.us         426                 :              0 :                     flags->cflags |= REG_QUOTE;
                                427                 :              0 :                     flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED);
                                428                 :              0 :                     break;
 6695 bruce@momjian.us          429                 :CBC           6 :                 case 's':       /* single line, \n ordinary */
 6791 tgl@sss.pgh.pa.us         430                 :              6 :                     flags->cflags &= ~REG_NEWLINE;
                                431                 :              6 :                     break;
 6695 bruce@momjian.us          432                 :UBC           0 :                 case 't':       /* tight syntax */
 6791 tgl@sss.pgh.pa.us         433                 :              0 :                     flags->cflags &= ~REG_EXPANDED;
                                434                 :              0 :                     break;
 6695 bruce@momjian.us          435                 :              0 :                 case 'w':       /* weird, \n affects ^ $ only */
 6935 neilc@samurai.com         436                 :              0 :                     flags->cflags &= ~REG_NLSTOP;
                                437                 :              0 :                     flags->cflags |= REG_NLANCH;
                                438                 :              0 :                     break;
 6695 bruce@momjian.us          439                 :CBC           3 :                 case 'x':       /* expanded syntax */
 6935 neilc@samurai.com         440                 :              3 :                     flags->cflags |= REG_EXPANDED;
                                441                 :              3 :                     break;
                                442                 :             12 :                 default:
                                443         [ +  - ]:             12 :                     ereport(ERROR,
                                444                 :                :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                445                 :                :                              errmsg("invalid regular expression option: \"%.*s\"",
                                446                 :                :                                     pg_mblen_range(opt_p + i, opt_p + opt_len), opt_p + i)));
                                447                 :                :                     break;
                                448                 :                :             }
                                449                 :                :         }
                                450                 :                :     }
                                451                 :         104924 : }
                                452                 :                : 
                                453                 :                : 
                                454                 :                : /*
                                455                 :                :  *  interface routines called by the function manager
                                456                 :                :  */
                                457                 :                : 
                                458                 :                : Datum
 9383 tgl@sss.pgh.pa.us         459                 :        3385667 : nameregexeq(PG_FUNCTION_ARGS)
                                460                 :                : {
                                461                 :        3385667 :     Name        n = PG_GETARG_NAME(0);
 6750                           462                 :        3385667 :     text       *p = PG_GETARG_TEXT_PP(1);
                                463                 :                : 
 8439                           464                 :        3385667 :     PG_RETURN_BOOL(RE_compile_and_execute(p,
                                465                 :                :                                           NameStr(*n),
                                466                 :                :                                           strlen(NameStr(*n)),
                                467                 :                :                                           REG_ADVANCED,
                                468                 :                :                                           PG_GET_COLLATION(),
                                469                 :                :                                           0, NULL));
                                470                 :                : }
                                471                 :                : 
                                472                 :                : Datum
 9383                           473                 :          14293 : nameregexne(PG_FUNCTION_ARGS)
                                474                 :                : {
                                475                 :          14293 :     Name        n = PG_GETARG_NAME(0);
 6750                           476                 :          14293 :     text       *p = PG_GETARG_TEXT_PP(1);
                                477                 :                : 
 8439                           478                 :          14293 :     PG_RETURN_BOOL(!RE_compile_and_execute(p,
                                479                 :                :                                            NameStr(*n),
                                480                 :                :                                            strlen(NameStr(*n)),
                                481                 :                :                                            REG_ADVANCED,
                                482                 :                :                                            PG_GET_COLLATION(),
                                483                 :                :                                            0, NULL));
                                484                 :                : }
                                485                 :                : 
                                486                 :                : Datum
 9383                           487                 :         197083 : textregexeq(PG_FUNCTION_ARGS)
                                488                 :                : {
 6750                           489                 :         197083 :     text       *s = PG_GETARG_TEXT_PP(0);
                                490                 :         197083 :     text       *p = PG_GETARG_TEXT_PP(1);
                                491                 :                : 
 8439                           492   [ -  +  -  -  :         197083 :     PG_RETURN_BOOL(RE_compile_and_execute(p,
                                     -  -  -  -  +  
                                           +  +  + ]
                                493                 :                :                                           VARDATA_ANY(s),
                                494                 :                :                                           VARSIZE_ANY_EXHDR(s),
                                495                 :                :                                           REG_ADVANCED,
                                496                 :                :                                           PG_GET_COLLATION(),
                                497                 :                :                                           0, NULL));
                                498                 :                : }
                                499                 :                : 
                                500                 :                : Datum
 9383                           501                 :          17079 : textregexne(PG_FUNCTION_ARGS)
                                502                 :                : {
 6750                           503                 :          17079 :     text       *s = PG_GETARG_TEXT_PP(0);
                                504                 :          17079 :     text       *p = PG_GETARG_TEXT_PP(1);
                                505                 :                : 
 8439                           506   [ -  +  -  -  :          17079 :     PG_RETURN_BOOL(!RE_compile_and_execute(p,
                                     -  -  -  -  +  
                                           +  +  + ]
                                507                 :                :                                            VARDATA_ANY(s),
                                508                 :                :                                            VARSIZE_ANY_EXHDR(s),
                                509                 :                :                                            REG_ADVANCED,
                                510                 :                :                                            PG_GET_COLLATION(),
                                511                 :                :                                            0, NULL));
                                512                 :                : }
                                513                 :                : 
                                514                 :                : 
                                515                 :                : /*
                                516                 :                :  *  routines that use the regexp stuff, but ignore the case.
                                517                 :                :  *  for this, we use the REG_ICASE flag to pg_regcomp
                                518                 :                :  */
                                519                 :                : 
                                520                 :                : 
                                521                 :                : Datum
                                522                 :           3762 : nameicregexeq(PG_FUNCTION_ARGS)
                                523                 :                : {
                                524                 :           3762 :     Name        n = PG_GETARG_NAME(0);
 6750                           525                 :           3762 :     text       *p = PG_GETARG_TEXT_PP(1);
                                526                 :                : 
 8439                           527                 :           3762 :     PG_RETURN_BOOL(RE_compile_and_execute(p,
                                528                 :                :                                           NameStr(*n),
                                529                 :                :                                           strlen(NameStr(*n)),
                                530                 :                :                                           REG_ADVANCED | REG_ICASE,
                                531                 :                :                                           PG_GET_COLLATION(),
                                532                 :                :                                           0, NULL));
                                533                 :                : }
                                534                 :                : 
                                535                 :                : Datum
                                536                 :              3 : nameicregexne(PG_FUNCTION_ARGS)
                                537                 :                : {
                                538                 :              3 :     Name        n = PG_GETARG_NAME(0);
 6750                           539                 :              3 :     text       *p = PG_GETARG_TEXT_PP(1);
                                540                 :                : 
 8439                           541                 :              3 :     PG_RETURN_BOOL(!RE_compile_and_execute(p,
                                542                 :                :                                            NameStr(*n),
                                543                 :                :                                            strlen(NameStr(*n)),
                                544                 :                :                                            REG_ADVANCED | REG_ICASE,
                                545                 :                :                                            PG_GET_COLLATION(),
                                546                 :                :                                            0, NULL));
                                547                 :                : }
                                548                 :                : 
                                549                 :                : Datum
                                550                 :            230 : texticregexeq(PG_FUNCTION_ARGS)
                                551                 :                : {
 6750                           552                 :            230 :     text       *s = PG_GETARG_TEXT_PP(0);
                                553                 :            230 :     text       *p = PG_GETARG_TEXT_PP(1);
                                554                 :                : 
 8439                           555   [ -  +  -  -  :            230 :     PG_RETURN_BOOL(RE_compile_and_execute(p,
                                     -  -  -  -  +  
                                           +  +  + ]
                                556                 :                :                                           VARDATA_ANY(s),
                                557                 :                :                                           VARSIZE_ANY_EXHDR(s),
                                558                 :                :                                           REG_ADVANCED | REG_ICASE,
                                559                 :                :                                           PG_GET_COLLATION(),
                                560                 :                :                                           0, NULL));
                                561                 :                : }
                                562                 :                : 
                                563                 :                : Datum
                                564                 :             14 : texticregexne(PG_FUNCTION_ARGS)
                                565                 :                : {
 6750                           566                 :             14 :     text       *s = PG_GETARG_TEXT_PP(0);
                                567                 :             14 :     text       *p = PG_GETARG_TEXT_PP(1);
                                568                 :                : 
 8439                           569   [ -  +  -  -  :             14 :     PG_RETURN_BOOL(!RE_compile_and_execute(p,
                                     -  -  -  -  +  
                                           +  +  + ]
                                570                 :                :                                            VARDATA_ANY(s),
                                571                 :                :                                            VARSIZE_ANY_EXHDR(s),
                                572                 :                :                                            REG_ADVANCED | REG_ICASE,
                                573                 :                :                                            PG_GET_COLLATION(),
                                574                 :                :                                            0, NULL));
                                575                 :                : }
                                576                 :                : 
                                577                 :                : 
                                578                 :                : /*
                                579                 :                :  * textregexsubstr()
                                580                 :                :  *      Return a substring matched by a regular expression.
                                581                 :                :  */
                                582                 :                : Datum
 8678 lockhart@fourpalms.o      583                 :            823 : textregexsubstr(PG_FUNCTION_ARGS)
                                584                 :                : {
 6750 tgl@sss.pgh.pa.us         585                 :            823 :     text       *s = PG_GETARG_TEXT_PP(0);
                                586                 :            823 :     text       *p = PG_GETARG_TEXT_PP(1);
                                587                 :                :     regex_t    *re;
                                588                 :                :     regmatch_t  pmatch[2];
                                589                 :                :     int         so,
                                590                 :                :                 eo;
                                591                 :                : 
                                592                 :                :     /* Compile RE */
 5453                           593                 :            823 :     re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
                                594                 :                : 
                                595                 :                :     /*
                                596                 :                :      * We pass two regmatch_t structs to get info about the overall match and
                                597                 :                :      * the match for the first parenthesized subexpression (if any). If there
                                598                 :                :      * is a parenthesized subexpression, we return what it matched; else
                                599                 :                :      * return what the whole regexp matched.
                                600                 :                :      */
 6570                           601         [ +  + ]:            823 :     if (!RE_execute(re,
                                602   [ -  +  -  -  :            823 :                     VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
                                     -  -  -  -  -  
                                           +  -  + ]
                                603                 :                :                     2, pmatch))
                                604                 :              6 :         PG_RETURN_NULL();       /* definitely no match */
                                605                 :                : 
                                606         [ +  + ]:            817 :     if (re->re_nsub > 0)
                                607                 :                :     {
                                608                 :                :         /* has parenthesized subexpressions, use the first one */
 8575                           609                 :            761 :         so = pmatch[1].rm_so;
                                610                 :            761 :         eo = pmatch[1].rm_eo;
                                611                 :                :     }
                                612                 :                :     else
                                613                 :                :     {
                                614                 :                :         /* no parenthesized subexpression, use whole match */
 6570                           615                 :             56 :         so = pmatch[0].rm_so;
                                616                 :             56 :         eo = pmatch[0].rm_eo;
                                617                 :                :     }
                                618                 :                : 
                                619                 :                :     /*
                                620                 :                :      * It is possible to have a match to the whole pattern but no match for a
                                621                 :                :      * subexpression; for example 'foo(bar)?' is considered to match 'foo' but
                                622                 :                :      * there is no subexpression match.  So this extra test for match failure
                                623                 :                :      * is not redundant.
                                624                 :                :      */
                                625   [ +  +  -  + ]:            817 :     if (so < 0 || eo < 0)
                                626                 :              3 :         PG_RETURN_NULL();
                                627                 :                : 
                                628                 :            814 :     return DirectFunctionCall3(text_substr,
                                629                 :                :                                PointerGetDatum(s),
                                630                 :                :                                Int32GetDatum(so + 1),
                                631                 :                :                                Int32GetDatum(eo - so));
                                632                 :                : }
                                633                 :                : 
                                634                 :                : /*
                                635                 :                :  * textregexreplace_noopt()
                                636                 :                :  *      Return a string matched by a regular expression, with replacement.
                                637                 :                :  *
                                638                 :                :  * This version doesn't have an option argument: we default to case
                                639                 :                :  * sensitive match, replace the first instance only.
                                640                 :                :  */
                                641                 :                : Datum
 7553 bruce@momjian.us          642                 :           7218 : textregexreplace_noopt(PG_FUNCTION_ARGS)
                                643                 :                : {
 6750 tgl@sss.pgh.pa.us         644                 :           7218 :     text       *s = PG_GETARG_TEXT_PP(0);
                                645                 :           7218 :     text       *p = PG_GETARG_TEXT_PP(1);
                                646                 :           7218 :     text       *r = PG_GETARG_TEXT_PP(2);
                                647                 :                : 
 1679                           648                 :           7218 :     PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
                                649                 :                :                                          REG_ADVANCED, PG_GET_COLLATION(),
                                650                 :                :                                          0, 1));
                                651                 :                : }
                                652                 :                : 
                                653                 :                : /*
                                654                 :                :  * textregexreplace()
                                655                 :                :  *      Return a string matched by a regular expression, with replacement.
                                656                 :                :  */
                                657                 :                : Datum
 7553 bruce@momjian.us          658                 :           2178 : textregexreplace(PG_FUNCTION_ARGS)
                                659                 :                : {
 6750 tgl@sss.pgh.pa.us         660                 :           2178 :     text       *s = PG_GETARG_TEXT_PP(0);
                                661                 :           2178 :     text       *p = PG_GETARG_TEXT_PP(1);
                                662                 :           2178 :     text       *r = PG_GETARG_TEXT_PP(2);
                                663                 :           2178 :     text       *opt = PG_GETARG_TEXT_PP(3);
                                664                 :                :     pg_re_flags flags;
                                665                 :                : 
                                666                 :                :     /*
                                667                 :                :      * regexp_replace() with four arguments will be preferentially resolved as
                                668                 :                :      * this form when the fourth argument is of type UNKNOWN.  However, the
                                669                 :                :      * user might have intended to call textregexreplace_extended_no_n.  If we
                                670                 :                :      * see flags that look like an integer, emit the same error that
                                671                 :                :      * parse_re_flags would, but add a HINT about how to fix it.
                                672                 :                :      */
 1685                           673   [ +  -  -  -  :           2178 :     if (VARSIZE_ANY_EXHDR(opt) > 0)
                                     -  -  -  -  -  
                                           +  +  - ]
                                674                 :                :     {
                                675         [ -  + ]:           2178 :         char       *opt_p = VARDATA_ANY(opt);
   67 tmunro@postgresql.or      676   [ -  +  -  -  :           2178 :         const char *end_p = opt_p + VARSIZE_ANY_EXHDR(opt);
                                     -  -  -  -  -  
                                                 + ]
                                677                 :                : 
 1685 tgl@sss.pgh.pa.us         678   [ +  -  +  + ]:           2178 :         if (*opt_p >= '0' && *opt_p <= '9')
                                679         [ +  - ]:              3 :             ereport(ERROR,
                                680                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                681                 :                :                      errmsg("invalid regular expression option: \"%.*s\"",
                                682                 :                :                             pg_mblen_range(opt_p, end_p), opt_p),
                                683                 :                :                      errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
                                684                 :                :     }
                                685                 :                : 
 6791                           686                 :           2175 :     parse_re_flags(&flags, opt);
                                687                 :                : 
 1679                           688                 :           2172 :     PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
                                689                 :                :                                          flags.cflags, PG_GET_COLLATION(),
                                690                 :                :                                          0, flags.glob ? 0 : 1));
                                691                 :                : }
                                692                 :                : 
                                693                 :                : /*
                                694                 :                :  * textregexreplace_extended()
                                695                 :                :  *      Return a string matched by a regular expression, with replacement.
                                696                 :                :  *      Extends textregexreplace by allowing a start position and the
                                697                 :                :  *      choice of the occurrence to replace (0 means all occurrences).
                                698                 :                :  */
                                699                 :                : Datum
 1685                           700                 :             33 : textregexreplace_extended(PG_FUNCTION_ARGS)
                                701                 :                : {
                                702                 :             33 :     text       *s = PG_GETARG_TEXT_PP(0);
                                703                 :             33 :     text       *p = PG_GETARG_TEXT_PP(1);
                                704                 :             33 :     text       *r = PG_GETARG_TEXT_PP(2);
                                705                 :             33 :     int         start = 1;
                                706                 :             33 :     int         n = 1;
                                707         [ +  + ]:             33 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
                                708                 :                :     pg_re_flags re_flags;
                                709                 :                : 
                                710                 :                :     /* Collect optional parameters */
                                711         [ +  - ]:             33 :     if (PG_NARGS() > 3)
                                712                 :                :     {
                                713                 :             33 :         start = PG_GETARG_INT32(3);
                                714         [ +  + ]:             33 :         if (start <= 0)
                                715         [ +  - ]:              3 :             ereport(ERROR,
                                716                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                717                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                                718                 :                :                             "start", start)));
                                719                 :                :     }
                                720         [ +  + ]:             30 :     if (PG_NARGS() > 4)
                                721                 :                :     {
                                722                 :             27 :         n = PG_GETARG_INT32(4);
                                723         [ +  + ]:             27 :         if (n < 0)
                                724         [ +  - ]:              3 :             ereport(ERROR,
                                725                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                726                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                                727                 :                :                             "n", n)));
                                728                 :                :     }
                                729                 :                : 
                                730                 :                :     /* Determine options */
                                731                 :             27 :     parse_re_flags(&re_flags, flags);
                                732                 :                : 
                                733                 :                :     /* If N was not specified, deduce it from the 'g' flag */
                                734         [ +  + ]:             27 :     if (PG_NARGS() <= 4)
                                735                 :              3 :         n = re_flags.glob ? 0 : 1;
                                736                 :                : 
                                737                 :                :     /* Do the replacement(s) */
 1679                           738                 :             27 :     PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
                                739                 :                :                                          re_flags.cflags, PG_GET_COLLATION(),
                                740                 :                :                                          start - 1, n));
                                741                 :                : }
                                742                 :                : 
                                743                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                                744                 :                : Datum
 1685                           745                 :              3 : textregexreplace_extended_no_n(PG_FUNCTION_ARGS)
                                746                 :                : {
                                747                 :              3 :     return textregexreplace_extended(fcinfo);
                                748                 :                : }
                                749                 :                : 
                                750                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                                751                 :                : Datum
                                752                 :              3 : textregexreplace_extended_no_flags(PG_FUNCTION_ARGS)
                                753                 :                : {
                                754                 :              3 :     return textregexreplace_extended(fcinfo);
                                755                 :                : }
                                756                 :                : 
                                757                 :                : /*
                                758                 :                :  * similar_to_escape(), similar_escape()
                                759                 :                :  *
                                760                 :                :  * Convert a SQL "SIMILAR TO" regexp pattern to POSIX style, so it can be
                                761                 :                :  * used by our regexp engine.
                                762                 :                :  *
                                763                 :                :  * similar_escape_internal() is the common workhorse for three SQL-exposed
                                764                 :                :  * functions.  esc_text can be passed as NULL to select the default escape
                                765                 :                :  * (which is '\'), or as an empty string to select no escape character.
                                766                 :                :  */
                                767                 :                : static text *
 2381                           768                 :             96 : similar_escape_internal(text *pat_text, text *esc_text)
                                769                 :                : {
                                770                 :                :     text       *result;
                                771                 :                :     char       *p,
                                772                 :                :                *e,
                                773                 :                :                *r;
                                774                 :                :     int         plen,
                                775                 :                :                 elen;
                                776                 :                :     const char *pend;
 8575                           777                 :             96 :     bool        afterescape = false;
                                778                 :             96 :     int         nquotes = 0;
  183                           779                 :             96 :     int         bracket_depth = 0;  /* square bracket nesting level */
                                780                 :             96 :     int         charclass_pos = 0;  /* position inside a character class */
                                781                 :                : 
 6750                           782         [ -  + ]:             96 :     p = VARDATA_ANY(pat_text);
                                783   [ -  +  -  -  :             96 :     plen = VARSIZE_ANY_EXHDR(pat_text);
                                     -  -  -  -  -  
                                                 + ]
   67 tmunro@postgresql.or      784                 :             96 :     pend = p + plen;
 2381 tgl@sss.pgh.pa.us         785         [ +  + ]:             96 :     if (esc_text == NULL)
                                786                 :                :     {
                                787                 :                :         /* No ESCAPE clause provided; default to backslash as escape */
 8575                           788                 :             44 :         e = "\\";
                                789                 :             44 :         elen = 1;
                                790                 :                :     }
                                791                 :                :     else
                                792                 :                :     {
 6750                           793         [ -  + ]:             52 :         e = VARDATA_ANY(esc_text);
                                794   [ -  +  -  -  :             52 :         elen = VARSIZE_ANY_EXHDR(esc_text);
                                     -  -  -  -  -  
                                                 + ]
 8575                           795         [ +  + ]:             52 :         if (elen == 0)
                                796                 :              3 :             e = NULL;           /* no escape character */
 2381                           797         [ +  + ]:             49 :         else if (elen > 1)
                                798                 :                :         {
 4218 jdavis@postgresql.or      799                 :              6 :             int         escape_mblen = pg_mbstrlen_with_len(e, elen);
                                800                 :                : 
                                801         [ +  + ]:              6 :             if (escape_mblen > 1)
                                802         [ +  - ]:              3 :                 ereport(ERROR,
                                803                 :                :                         (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                                804                 :                :                          errmsg("invalid escape string"),
                                805                 :                :                          errhint("Escape string must be empty or one character.")));
                                806                 :                :         }
                                807                 :                :     }
                                808                 :                : 
                                809                 :                :     /*----------
                                810                 :                :      * We surround the transformed input string with
                                811                 :                :      *          ^(?: ... )$
                                812                 :                :      * which requires some explanation.  We need "^" and "$" to force
                                813                 :                :      * the pattern to match the entire input string as per the SQL spec.
                                814                 :                :      * The "(?:" and ")" are a non-capturing set of parens; we have to have
                                815                 :                :      * parens in case the string contains "|", else the "^" and "$" will
                                816                 :                :      * be bound into the first and last alternatives which is not what we
                                817                 :                :      * want, and the parens must be non capturing because we don't want them
                                818                 :                :      * to count when selecting output for SUBSTRING.
                                819                 :                :      *
                                820                 :                :      * When the pattern is divided into three parts by escape-double-quotes,
                                821                 :                :      * what we emit is
                                822                 :                :      *          ^(?:part1){1,1}?(part2){1,1}(?:part3)$
                                823                 :                :      * which requires even more explanation.  The "{1,1}?" on part1 makes it
                                824                 :                :      * non-greedy so that it will match the smallest possible amount of text
                                825                 :                :      * not the largest, as required by SQL.  The plain parens around part2
                                826                 :                :      * are capturing parens so that that part is what controls the result of
                                827                 :                :      * SUBSTRING.  The "{1,1}" forces part2 to be greedy, so that it matches
                                828                 :                :      * the largest possible amount of text; hence part3 must match the
                                829                 :                :      * smallest amount of text, as required by SQL.  We don't need an explicit
                                830                 :                :      * greediness marker on part3.  Note that this also confines the effects
                                831                 :                :      * of any "|" characters to the respective part, which is what we want.
                                832                 :                :      *
                                833                 :                :      * The SQL spec says that SUBSTRING's pattern must contain exactly two
                                834                 :                :      * escape-double-quotes, but we only complain if there's more than two.
                                835                 :                :      * With none, we act as though part1 and part3 are empty; with one, we
                                836                 :                :      * act as though part3 is empty.  Both behaviors fall out of omitting
                                837                 :                :      * the relevant part separators in the above expansion.  If the result
                                838                 :                :      * of this function is used in a plain regexp match (SIMILAR TO), the
                                839                 :                :      * escape-double-quotes have no effect on the match behavior.
                                840                 :                :      *
                                841                 :                :      * While we don't fully validate character classes (bracket expressions),
                                842                 :                :      * we do need to parse them well enough to know where they end.
                                843                 :                :      * "charclass_pos" tracks where we are in a character class.
                                844                 :                :      * Its value is uninteresting when bracket_depth is 0.
                                845                 :                :      * But when bracket_depth > 0, it will be
                                846                 :                :      *   1: right after the opening '[' (a following '^' will negate
                                847                 :                :      *      the class, while ']' is a literal character)
                                848                 :                :      *   2: right after a '^' after the opening '[' (']' is still a literal
                                849                 :                :      *      character)
                                850                 :                :      *   3 or more: further inside the character class (']' ends the class)
                                851                 :                :      *----------
                                852                 :                :      */
                                853                 :                : 
                                854                 :                :     /*
                                855                 :                :      * We need room for the prefix/postfix and part separators, plus as many
                                856                 :                :      * as 3 output bytes per input byte; since the input is at most 1GB this
                                857                 :                :      * can't overflow size_t.
                                858                 :                :      */
 2497 tgl@sss.pgh.pa.us         859                 :             93 :     result = (text *) palloc(VARHDRSZ + 23 + 3 * (size_t) plen);
 8575                           860                 :             93 :     r = VARDATA(result);
                                861                 :                : 
                                862                 :             93 :     *r++ = '^';
 7276                           863                 :             93 :     *r++ = '(';
                                864                 :             93 :     *r++ = '?';
                                865                 :             93 :     *r++ = ':';
                                866                 :                : 
 8575                           867         [ +  + ]:            926 :     while (plen > 0)
                                868                 :                :     {
 7456 bruce@momjian.us          869                 :            836 :         char        pchar = *p;
                                870                 :                : 
                                871                 :                :         /*
                                872                 :                :          * If both the escape character and the current character from the
                                873                 :                :          * pattern are multi-byte, we need to take the slow path.
                                874                 :                :          *
                                875                 :                :          * But if one of them is single-byte, we can process the pattern one
                                876                 :                :          * byte at a time, ignoring multi-byte characters.  (This works
                                877                 :                :          * because all server-encodings have the property that a valid
                                878                 :                :          * multi-byte character representation cannot contain the
                                879                 :                :          * representation of a valid single-byte character.)
                                880                 :                :          */
                                881                 :                : 
 4218 jdavis@postgresql.or      882         [ +  + ]:            836 :         if (elen > 1)
                                883                 :                :         {
   67 tmunro@postgresql.or      884                 :              3 :             int         mblen = pg_mblen_range(p, pend);
                                885                 :                : 
 4218 jdavis@postgresql.or      886         [ +  - ]:              3 :             if (mblen > 1)
                                887                 :                :             {
                                888                 :                :                 /* slow, multi-byte path */
                                889         [ -  + ]:              3 :                 if (afterescape)
                                890                 :                :                 {
 4218 jdavis@postgresql.or      891                 :UBC           0 :                     *r++ = '\\';
                                892                 :              0 :                     memcpy(r, p, mblen);
                                893                 :              0 :                     r += mblen;
                                894                 :              0 :                     afterescape = false;
                                895                 :                :                 }
 4218 jdavis@postgresql.or      896   [ +  -  +  -  :CBC           3 :                 else if (e && elen == mblen && memcmp(e, p, mblen) == 0)
                                              -  + ]
                                897                 :                :                 {
                                898                 :                :                     /* SQL escape character; do not send to output */
 4218 jdavis@postgresql.or      899                 :UBC           0 :                     afterescape = true;
                                900                 :                :                 }
                                901                 :                :                 else
                                902                 :                :                 {
                                903                 :                :                     /*
                                904                 :                :                      * We know it's a multi-byte character, so we don't need
                                905                 :                :                      * to do all the comparisons to single-byte characters
                                906                 :                :                      * that we do below.
                                907                 :                :                      */
 4218 jdavis@postgresql.or      908                 :CBC           3 :                     memcpy(r, p, mblen);
                                909                 :              3 :                     r += mblen;
                                910                 :                :                 }
                                911                 :                : 
                                912                 :              3 :                 p += mblen;
                                913                 :              3 :                 plen -= mblen;
                                914                 :                : 
                                915                 :              3 :                 continue;
                                916                 :                :             }
                                917                 :                :         }
                                918                 :                : 
                                919                 :                :         /* fast path */
 8575 tgl@sss.pgh.pa.us         920         [ +  + ]:            833 :         if (afterescape)
                                921                 :                :         {
  183                           922   [ +  +  +  - ]:             83 :             if (pchar == '"' && bracket_depth < 1)  /* escape-double-quote? */
                                923                 :                :             {
                                924                 :                :                 /* emit appropriate part separator, per notes above */
 2497                           925         [ +  + ]:             62 :                 if (nquotes == 0)
                                926                 :                :                 {
                                927                 :             31 :                     *r++ = ')';
                                928                 :             31 :                     *r++ = '{';
                                929                 :             31 :                     *r++ = '1';
                                930                 :             31 :                     *r++ = ',';
                                931                 :             31 :                     *r++ = '1';
                                932                 :             31 :                     *r++ = '}';
                                933                 :             31 :                     *r++ = '?';
                                934                 :             31 :                     *r++ = '(';
                                935                 :                :                 }
                                936         [ +  + ]:             31 :                 else if (nquotes == 1)
                                937                 :                :                 {
                                938                 :             28 :                     *r++ = ')';
                                939                 :             28 :                     *r++ = '{';
                                940                 :             28 :                     *r++ = '1';
                                941                 :             28 :                     *r++ = ',';
                                942                 :             28 :                     *r++ = '1';
                                943                 :             28 :                     *r++ = '}';
                                944                 :             28 :                     *r++ = '(';
                                945                 :             28 :                     *r++ = '?';
                                946                 :             28 :                     *r++ = ':';
                                947                 :                :                 }
                                948                 :                :                 else
                                949         [ +  - ]:              3 :                     ereport(ERROR,
                                950                 :                :                             (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER),
                                951                 :                :                              errmsg("SQL regular expression may not contain more than two escape-double-quote separators")));
                                952                 :             59 :                 nquotes++;
                                953                 :                :             }
                                954                 :                :             else
                                955                 :                :             {
                                956                 :                :                 /*
                                957                 :                :                  * We allow any character at all to be escaped; notably, this
                                958                 :                :                  * allows access to POSIX character-class escapes such as
                                959                 :                :                  * "\d".  The SQL spec is considerably more restrictive.
                                960                 :                :                  */
 8575                           961                 :             21 :                 *r++ = '\\';
                                962                 :             21 :                 *r++ = pchar;
                                963                 :                : 
                                964                 :                :                 /*
                                965                 :                :                  * If we encounter an escaped character in a character class,
                                966                 :                :                  * we are no longer at the beginning.
                                967                 :                :                  */
  183                           968                 :             21 :                 charclass_pos = 3;
                                969                 :                :             }
 8575                           970                 :             80 :             afterescape = false;
                                971                 :                :         }
                                972   [ +  +  +  + ]:            750 :         else if (e && pchar == *e)
                                973                 :                :         {
                                974                 :                :             /* SQL escape character; do not send to output */
                                975                 :             83 :             afterescape = true;
                                976                 :                :         }
  183                           977         [ +  + ]:            667 :         else if (bracket_depth > 0)
                                978                 :                :         {
                                979                 :                :             /* inside a character class */
 5916                           980         [ -  + ]:            306 :             if (pchar == '\\')
                                981                 :                :             {
                                982                 :                :                 /*
                                983                 :                :                  * If we're here, backslash is not the SQL escape character,
                                984                 :                :                  * so treat it as a literal class element, which requires
                                985                 :                :                  * doubling it.  (This matches our behavior for backslashes
                                986                 :                :                  * outside character classes.)
                                987                 :                :                  */
 5916 tgl@sss.pgh.pa.us         988                 :UBC           0 :                 *r++ = '\\';
                                989                 :                :             }
 5916 tgl@sss.pgh.pa.us         990                 :CBC         306 :             *r++ = pchar;
                                991                 :                : 
                                992                 :                :             /* parse the character class well enough to identify ending ']' */
  183                           993   [ +  +  +  + ]:            306 :             if (pchar == ']' && charclass_pos > 2)
                                994                 :                :             {
                                995                 :                :                 /* found the real end of a bracket pair */
                                996                 :             69 :                 bracket_depth--;
                                997                 :                :                 /* don't reset charclass_pos, this may be an inner bracket */
                                998                 :                :             }
  291 michael@paquier.xyz       999         [ +  + ]:            237 :             else if (pchar == '[')
                               1000                 :                :             {
                               1001                 :                :                 /* start of a nested bracket pair */
  183 tgl@sss.pgh.pa.us        1002                 :             36 :                 bracket_depth++;
                               1003                 :                : 
                               1004                 :                :                 /*
                               1005                 :                :                  * We are no longer at the beginning of a character class.
                               1006                 :                :                  * (The nested bracket pair is a collating element, not a
                               1007                 :                :                  * character class in its own right.)
                               1008                 :                :                  */
                               1009                 :             36 :                 charclass_pos = 3;
                               1010                 :                :             }
                               1011         [ +  + ]:            201 :             else if (pchar == '^')
                               1012                 :                :             {
                               1013                 :                :                 /*
                               1014                 :                :                  * A caret right after the opening bracket negates the
                               1015                 :                :                  * character class.  In that case, the following will
                               1016                 :                :                  * increment charclass_pos from 1 to 2, so that a following
                               1017                 :                :                  * ']' is still a literal character and does not end the
                               1018                 :                :                  * character class.  If we are further inside a character
                               1019                 :                :                  * class, charclass_pos might get incremented past 3, which is
                               1020                 :                :                  * fine.
                               1021                 :                :                  */
                               1022                 :             30 :                 charclass_pos++;
                               1023                 :                :             }
                               1024                 :                :             else
                               1025                 :                :             {
                               1026                 :                :                 /*
                               1027                 :                :                  * Anything else (including a backslash or leading ']') is an
                               1028                 :                :                  * element of the character class, so we are no longer at the
                               1029                 :                :                  * beginning of the class.
                               1030                 :                :                  */
                               1031                 :            171 :                 charclass_pos = 3;
                               1032                 :                :             }
                               1033                 :                :         }
 5916                          1034         [ +  + ]:            361 :         else if (pchar == '[')
                               1035                 :                :         {
                               1036                 :                :             /* start of a character class */
                               1037                 :             33 :             *r++ = pchar;
  183                          1038                 :             33 :             bracket_depth = 1;
                               1039                 :             33 :             charclass_pos = 1;
                               1040                 :                :         }
 8575                          1041         [ +  + ]:            328 :         else if (pchar == '%')
                               1042                 :                :         {
                               1043                 :             66 :             *r++ = '.';
                               1044                 :             66 :             *r++ = '*';
                               1045                 :                :         }
                               1046         [ +  + ]:            262 :         else if (pchar == '_')
                               1047                 :             32 :             *r++ = '.';
 5916                          1048         [ +  + ]:            230 :         else if (pchar == '(')
                               1049                 :                :         {
                               1050                 :                :             /* convert to non-capturing parenthesis */
                               1051                 :             15 :             *r++ = '(';
                               1052                 :             15 :             *r++ = '?';
                               1053                 :             15 :             *r++ = ':';
                               1054                 :                :         }
 6000                          1055   [ +  +  +  +  :            215 :         else if (pchar == '\\' || pchar == '.' ||
                                              +  + ]
                               1056         [ +  + ]:            195 :                  pchar == '^' || pchar == '$')
                               1057                 :                :         {
 8575                          1058                 :             26 :             *r++ = '\\';
                               1059                 :             26 :             *r++ = pchar;
                               1060                 :                :         }
                               1061                 :                :         else
                               1062                 :            189 :             *r++ = pchar;
                               1063                 :            830 :         p++, plen--;
                               1064                 :                :     }
                               1065                 :                : 
 7276                          1066                 :             90 :     *r++ = ')';
 8575                          1067                 :             90 :     *r++ = '$';
                               1068                 :                : 
 6956                          1069                 :             90 :     SET_VARSIZE(result, r - ((char *) result));
                               1070                 :                : 
 2381                          1071                 :             90 :     return result;
                               1072                 :                : }
                               1073                 :                : 
                               1074                 :                : /*
                               1075                 :                :  * similar_to_escape(pattern, escape)
                               1076                 :                :  */
                               1077                 :                : Datum
                               1078                 :             52 : similar_to_escape_2(PG_FUNCTION_ARGS)
                               1079                 :                : {
                               1080                 :             52 :     text       *pat_text = PG_GETARG_TEXT_PP(0);
                               1081                 :             52 :     text       *esc_text = PG_GETARG_TEXT_PP(1);
                               1082                 :                :     text       *result;
                               1083                 :                : 
                               1084                 :             52 :     result = similar_escape_internal(pat_text, esc_text);
                               1085                 :                : 
                               1086                 :             46 :     PG_RETURN_TEXT_P(result);
                               1087                 :                : }
                               1088                 :                : 
                               1089                 :                : /*
                               1090                 :                :  * similar_to_escape(pattern)
                               1091                 :                :  * Inserts a default escape character.
                               1092                 :                :  */
                               1093                 :                : Datum
                               1094                 :             44 : similar_to_escape_1(PG_FUNCTION_ARGS)
                               1095                 :                : {
                               1096                 :             44 :     text       *pat_text = PG_GETARG_TEXT_PP(0);
                               1097                 :                :     text       *result;
                               1098                 :                : 
                               1099                 :             44 :     result = similar_escape_internal(pat_text, NULL);
                               1100                 :                : 
                               1101                 :             44 :     PG_RETURN_TEXT_P(result);
                               1102                 :                : }
                               1103                 :                : 
                               1104                 :                : /*
                               1105                 :                :  * similar_escape(pattern, escape)
                               1106                 :                :  *
                               1107                 :                :  * Legacy function for compatibility with views stored using the
                               1108                 :                :  * pre-v13 expansion of SIMILAR TO.  Unlike the above functions, this
                               1109                 :                :  * is non-strict, which leads to not-per-spec handling of "ESCAPE NULL".
                               1110                 :                :  */
                               1111                 :                : Datum
 2381 tgl@sss.pgh.pa.us        1112                 :UBC           0 : similar_escape(PG_FUNCTION_ARGS)
                               1113                 :                : {
                               1114                 :                :     text       *pat_text;
                               1115                 :                :     text       *esc_text;
                               1116                 :                :     text       *result;
                               1117                 :                : 
                               1118                 :                :     /* This function is not strict, so must test explicitly */
                               1119         [ #  # ]:              0 :     if (PG_ARGISNULL(0))
                               1120                 :              0 :         PG_RETURN_NULL();
                               1121                 :              0 :     pat_text = PG_GETARG_TEXT_PP(0);
                               1122                 :                : 
                               1123         [ #  # ]:              0 :     if (PG_ARGISNULL(1))
                               1124                 :              0 :         esc_text = NULL;        /* use default escape character */
                               1125                 :                :     else
                               1126                 :              0 :         esc_text = PG_GETARG_TEXT_PP(1);
                               1127                 :                : 
                               1128                 :              0 :     result = similar_escape_internal(pat_text, esc_text);
                               1129                 :                : 
 8575                          1130                 :              0 :     PG_RETURN_TEXT_P(result);
                               1131                 :                : }
                               1132                 :                : 
                               1133                 :                : /*
                               1134                 :                :  * regexp_count()
                               1135                 :                :  *      Return the number of matches of a pattern within a string.
                               1136                 :                :  */
                               1137                 :                : Datum
 1685 tgl@sss.pgh.pa.us        1138                 :CBC          24 : regexp_count(PG_FUNCTION_ARGS)
                               1139                 :                : {
                               1140                 :             24 :     text       *str = PG_GETARG_TEXT_PP(0);
                               1141                 :             24 :     text       *pattern = PG_GETARG_TEXT_PP(1);
                               1142                 :             24 :     int         start = 1;
                               1143         [ +  + ]:             24 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3);
                               1144                 :                :     pg_re_flags re_flags;
                               1145                 :                :     regexp_matches_ctx *matchctx;
                               1146                 :                : 
                               1147                 :                :     /* Collect optional parameters */
                               1148         [ +  + ]:             24 :     if (PG_NARGS() > 2)
                               1149                 :                :     {
                               1150                 :             21 :         start = PG_GETARG_INT32(2);
                               1151         [ +  + ]:             21 :         if (start <= 0)
                               1152         [ +  - ]:              6 :             ereport(ERROR,
                               1153                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1154                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                               1155                 :                :                             "start", start)));
                               1156                 :                :     }
                               1157                 :                : 
                               1158                 :                :     /* Determine options */
                               1159                 :             18 :     parse_re_flags(&re_flags, flags);
                               1160                 :                :     /* User mustn't specify 'g' */
                               1161         [ -  + ]:             18 :     if (re_flags.glob)
 1685 tgl@sss.pgh.pa.us        1162         [ #  # ]:UBC           0 :         ereport(ERROR,
                               1163                 :                :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1164                 :                :         /* translator: %s is a SQL function name */
                               1165                 :                :                  errmsg("%s does not support the \"global\" option",
                               1166                 :                :                         "regexp_count()")));
                               1167                 :                :     /* But we find all the matches anyway */
 1685 tgl@sss.pgh.pa.us        1168                 :CBC          18 :     re_flags.glob = true;
                               1169                 :                : 
                               1170                 :                :     /* Do the matching */
                               1171                 :             18 :     matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
                               1172                 :                :                                     PG_GET_COLLATION(),
                               1173                 :                :                                     false,  /* can ignore subexprs */
                               1174                 :                :                                     false, false);
                               1175                 :                : 
                               1176                 :             18 :     PG_RETURN_INT32(matchctx->nmatches);
                               1177                 :                : }
                               1178                 :                : 
                               1179                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1180                 :                : Datum
                               1181                 :              3 : regexp_count_no_start(PG_FUNCTION_ARGS)
                               1182                 :                : {
                               1183                 :              3 :     return regexp_count(fcinfo);
                               1184                 :                : }
                               1185                 :                : 
                               1186                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1187                 :                : Datum
                               1188                 :             15 : regexp_count_no_flags(PG_FUNCTION_ARGS)
                               1189                 :                : {
                               1190                 :             15 :     return regexp_count(fcinfo);
                               1191                 :                : }
                               1192                 :                : 
                               1193                 :                : /*
                               1194                 :                :  * regexp_instr()
                               1195                 :                :  *      Return the match's position within the string
                               1196                 :                :  */
                               1197                 :                : Datum
                               1198                 :             78 : regexp_instr(PG_FUNCTION_ARGS)
                               1199                 :                : {
                               1200                 :             78 :     text       *str = PG_GETARG_TEXT_PP(0);
                               1201                 :             78 :     text       *pattern = PG_GETARG_TEXT_PP(1);
                               1202                 :             78 :     int         start = 1;
                               1203                 :             78 :     int         n = 1;
                               1204                 :             78 :     int         endoption = 0;
                               1205         [ +  + ]:             78 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
                               1206                 :             78 :     int         subexpr = 0;
                               1207                 :                :     int         pos;
                               1208                 :                :     pg_re_flags re_flags;
                               1209                 :                :     regexp_matches_ctx *matchctx;
                               1210                 :                : 
                               1211                 :                :     /* Collect optional parameters */
                               1212         [ +  + ]:             78 :     if (PG_NARGS() > 2)
                               1213                 :                :     {
                               1214                 :             69 :         start = PG_GETARG_INT32(2);
                               1215         [ +  + ]:             69 :         if (start <= 0)
                               1216         [ +  - ]:              3 :             ereport(ERROR,
                               1217                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1218                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                               1219                 :                :                             "start", start)));
                               1220                 :                :     }
                               1221         [ +  + ]:             75 :     if (PG_NARGS() > 3)
                               1222                 :                :     {
                               1223                 :             63 :         n = PG_GETARG_INT32(3);
                               1224         [ +  + ]:             63 :         if (n <= 0)
                               1225         [ +  - ]:              3 :             ereport(ERROR,
                               1226                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1227                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                               1228                 :                :                             "n", n)));
                               1229                 :                :     }
                               1230         [ +  + ]:             72 :     if (PG_NARGS() > 4)
                               1231                 :                :     {
                               1232                 :             54 :         endoption = PG_GETARG_INT32(4);
                               1233   [ +  +  +  + ]:             54 :         if (endoption != 0 && endoption != 1)
                               1234         [ +  - ]:              6 :             ereport(ERROR,
                               1235                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1236                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                               1237                 :                :                             "endoption", endoption)));
                               1238                 :                :     }
                               1239         [ +  + ]:             66 :     if (PG_NARGS() > 6)
                               1240                 :                :     {
                               1241                 :             42 :         subexpr = PG_GETARG_INT32(6);
                               1242         [ +  + ]:             42 :         if (subexpr < 0)
                               1243         [ +  - ]:              3 :             ereport(ERROR,
                               1244                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1245                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                               1246                 :                :                             "subexpr", subexpr)));
                               1247                 :                :     }
                               1248                 :                : 
                               1249                 :                :     /* Determine options */
                               1250                 :             63 :     parse_re_flags(&re_flags, flags);
                               1251                 :                :     /* User mustn't specify 'g' */
                               1252         [ +  + ]:             63 :     if (re_flags.glob)
                               1253         [ +  - ]:              3 :         ereport(ERROR,
                               1254                 :                :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1255                 :                :         /* translator: %s is a SQL function name */
                               1256                 :                :                  errmsg("%s does not support the \"global\" option",
                               1257                 :                :                         "regexp_instr()")));
                               1258                 :                :     /* But we find all the matches anyway */
                               1259                 :             60 :     re_flags.glob = true;
                               1260                 :                : 
                               1261                 :                :     /* Do the matching */
                               1262                 :             60 :     matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
                               1263                 :                :                                     PG_GET_COLLATION(),
                               1264                 :                :                                     (subexpr > 0),   /* need submatches? */
                               1265                 :                :                                     false, false);
                               1266                 :                : 
                               1267                 :                :     /* When n exceeds matches return 0 (includes case of no matches) */
                               1268         [ +  + ]:             60 :     if (n > matchctx->nmatches)
                               1269                 :              6 :         PG_RETURN_INT32(0);
                               1270                 :                : 
                               1271                 :                :     /* When subexpr exceeds number of subexpressions return 0 */
                               1272         [ +  + ]:             54 :     if (subexpr > matchctx->npatterns)
                               1273                 :              6 :         PG_RETURN_INT32(0);
                               1274                 :                : 
                               1275                 :                :     /* Select the appropriate match position to return */
                               1276                 :             48 :     pos = (n - 1) * matchctx->npatterns;
                               1277         [ +  + ]:             48 :     if (subexpr > 0)
                               1278                 :             27 :         pos += subexpr - 1;
                               1279                 :             48 :     pos *= 2;
                               1280         [ +  + ]:             48 :     if (endoption == 1)
                               1281                 :             15 :         pos += 1;
                               1282                 :                : 
                               1283         [ +  + ]:             48 :     if (matchctx->match_locs[pos] >= 0)
                               1284                 :             45 :         PG_RETURN_INT32(matchctx->match_locs[pos] + 1);
                               1285                 :                :     else
                               1286                 :              3 :         PG_RETURN_INT32(0);     /* position not identifiable */
                               1287                 :                : }
                               1288                 :                : 
                               1289                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1290                 :                : Datum
                               1291                 :              9 : regexp_instr_no_start(PG_FUNCTION_ARGS)
                               1292                 :                : {
                               1293                 :              9 :     return regexp_instr(fcinfo);
                               1294                 :                : }
                               1295                 :                : 
                               1296                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1297                 :                : Datum
                               1298                 :              3 : regexp_instr_no_n(PG_FUNCTION_ARGS)
                               1299                 :                : {
                               1300                 :              3 :     return regexp_instr(fcinfo);
                               1301                 :                : }
                               1302                 :                : 
                               1303                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1304                 :                : Datum
                               1305                 :             12 : regexp_instr_no_endoption(PG_FUNCTION_ARGS)
                               1306                 :                : {
                               1307                 :             12 :     return regexp_instr(fcinfo);
                               1308                 :                : }
                               1309                 :                : 
                               1310                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1311                 :                : Datum
                               1312                 :              6 : regexp_instr_no_flags(PG_FUNCTION_ARGS)
                               1313                 :                : {
                               1314                 :              6 :     return regexp_instr(fcinfo);
                               1315                 :                : }
                               1316                 :                : 
                               1317                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1318                 :                : Datum
                               1319                 :              6 : regexp_instr_no_subexpr(PG_FUNCTION_ARGS)
                               1320                 :                : {
                               1321                 :              6 :     return regexp_instr(fcinfo);
                               1322                 :                : }
                               1323                 :                : 
                               1324                 :                : /*
                               1325                 :                :  * regexp_like()
                               1326                 :                :  *      Test for a pattern match within a string.
                               1327                 :                :  */
                               1328                 :                : Datum
                               1329                 :             15 : regexp_like(PG_FUNCTION_ARGS)
                               1330                 :                : {
                               1331                 :             15 :     text       *str = PG_GETARG_TEXT_PP(0);
                               1332                 :             15 :     text       *pattern = PG_GETARG_TEXT_PP(1);
                               1333         [ +  + ]:             15 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
                               1334                 :                :     pg_re_flags re_flags;
                               1335                 :                : 
                               1336                 :                :     /* Determine options */
                               1337                 :             15 :     parse_re_flags(&re_flags, flags);
                               1338                 :                :     /* User mustn't specify 'g' */
                               1339         [ +  + ]:             15 :     if (re_flags.glob)
                               1340         [ +  - ]:              3 :         ereport(ERROR,
                               1341                 :                :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1342                 :                :         /* translator: %s is a SQL function name */
                               1343                 :                :                  errmsg("%s does not support the \"global\" option",
                               1344                 :                :                         "regexp_like()")));
                               1345                 :                : 
                               1346                 :                :     /* Otherwise it's like textregexeq/texticregexeq */
                               1347   [ -  +  -  -  :             12 :     PG_RETURN_BOOL(RE_compile_and_execute(pattern,
                                     -  -  -  -  -  
                                           +  -  + ]
                               1348                 :                :                                           VARDATA_ANY(str),
                               1349                 :                :                                           VARSIZE_ANY_EXHDR(str),
                               1350                 :                :                                           re_flags.cflags,
                               1351                 :                :                                           PG_GET_COLLATION(),
                               1352                 :                :                                           0, NULL));
                               1353                 :                : }
                               1354                 :                : 
                               1355                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1356                 :                : Datum
                               1357                 :              3 : regexp_like_no_flags(PG_FUNCTION_ARGS)
                               1358                 :                : {
                               1359                 :              3 :     return regexp_like(fcinfo);
                               1360                 :                : }
                               1361                 :                : 
                               1362                 :                : /*
                               1363                 :                :  * regexp_match()
                               1364                 :                :  *      Return the first substring(s) matching a pattern within a string.
                               1365                 :                :  */
                               1366                 :                : Datum
 3497                          1367                 :           1270 : regexp_match(PG_FUNCTION_ARGS)
                               1368                 :                : {
                               1369                 :           1270 :     text       *orig_str = PG_GETARG_TEXT_PP(0);
                               1370                 :           1270 :     text       *pattern = PG_GETARG_TEXT_PP(1);
                               1371         [ +  + ]:           1270 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
                               1372                 :                :     pg_re_flags re_flags;
                               1373                 :                :     regexp_matches_ctx *matchctx;
                               1374                 :                : 
                               1375                 :                :     /* Determine options */
                               1376                 :           1270 :     parse_re_flags(&re_flags, flags);
                               1377                 :                :     /* User mustn't specify 'g' */
                               1378         [ +  + ]:           1270 :     if (re_flags.glob)
                               1379         [ +  - ]:              4 :         ereport(ERROR,
                               1380                 :                :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1381                 :                :         /* translator: %s is a SQL function name */
                               1382                 :                :                  errmsg("%s does not support the \"global\" option",
                               1383                 :                :                         "regexp_match()"),
                               1384                 :                :                  errhint("Use the regexp_matches function instead.")));
                               1385                 :                : 
 1685                          1386                 :           1266 :     matchctx = setup_regexp_matches(orig_str, pattern, &re_flags, 0,
                               1387                 :                :                                     PG_GET_COLLATION(), true, false, false);
                               1388                 :                : 
 3497                          1389         [ +  + ]:           1266 :     if (matchctx->nmatches == 0)
                               1390                 :             65 :         PG_RETURN_NULL();
                               1391                 :                : 
                               1392         [ -  + ]:           1201 :     Assert(matchctx->nmatches == 1);
                               1393                 :                : 
                               1394                 :                :     /* Create workspace that build_regexp_match_result needs */
   95 michael@paquier.xyz      1395                 :GNC        1201 :     matchctx->elems = palloc_array(Datum, matchctx->npatterns);
                               1396                 :           1201 :     matchctx->nulls = palloc_array(bool, matchctx->npatterns);
                               1397                 :                : 
 3497 tgl@sss.pgh.pa.us        1398                 :CBC        1201 :     PG_RETURN_DATUM(PointerGetDatum(build_regexp_match_result(matchctx)));
                               1399                 :                : }
                               1400                 :                : 
                               1401                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1402                 :                : Datum
                               1403                 :           1255 : regexp_match_no_flags(PG_FUNCTION_ARGS)
                               1404                 :                : {
                               1405                 :           1255 :     return regexp_match(fcinfo);
                               1406                 :                : }
                               1407                 :                : 
                               1408                 :                : /*
                               1409                 :                :  * regexp_matches()
                               1410                 :                :  *      Return a table of all matches of a pattern within a string.
                               1411                 :                :  */
                               1412                 :                : Datum
 6935 neilc@samurai.com        1413                 :           1305 : regexp_matches(PG_FUNCTION_ARGS)
                               1414                 :                : {
                               1415                 :                :     FuncCallContext *funcctx;
                               1416                 :                :     regexp_matches_ctx *matchctx;
                               1417                 :                : 
                               1418         [ +  + ]:           1305 :     if (SRF_IS_FIRSTCALL())
                               1419                 :                :     {
 6695 bruce@momjian.us         1420                 :            975 :         text       *pattern = PG_GETARG_TEXT_PP(1);
                               1421         [ +  + ]:            975 :         text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
                               1422                 :                :         pg_re_flags re_flags;
                               1423                 :                :         MemoryContext oldcontext;
                               1424                 :                : 
 6935 neilc@samurai.com        1425                 :            975 :         funcctx = SRF_FIRSTCALL_INIT();
                               1426                 :            975 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
                               1427                 :                : 
                               1428                 :                :         /* Determine options */
 3497 tgl@sss.pgh.pa.us        1429                 :            975 :         parse_re_flags(&re_flags, flags);
                               1430                 :                : 
                               1431                 :                :         /* be sure to copy the input string into the multi-call ctx */
 6927 neilc@samurai.com        1432                 :            972 :         matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
                               1433                 :                :                                         &re_flags, 0,
                               1434                 :                :                                         PG_GET_COLLATION(),
                               1435                 :                :                                         true, false, false);
                               1436                 :                : 
                               1437                 :                :         /* Pre-create workspace that build_regexp_match_result needs */
   95 michael@paquier.xyz      1438                 :GNC         966 :         matchctx->elems = palloc_array(Datum, matchctx->npatterns);
                               1439                 :            966 :         matchctx->nulls = palloc_array(bool, matchctx->npatterns);
                               1440                 :                : 
 6935 neilc@samurai.com        1441                 :CBC         966 :         MemoryContextSwitchTo(oldcontext);
  472 peter@eisentraut.org     1442                 :            966 :         funcctx->user_fctx = matchctx;
                               1443                 :                :     }
                               1444                 :                : 
 6935 neilc@samurai.com        1445                 :           1296 :     funcctx = SRF_PERCALL_SETUP();
                               1446                 :           1296 :     matchctx = (regexp_matches_ctx *) funcctx->user_fctx;
                               1447                 :                : 
 6791 tgl@sss.pgh.pa.us        1448         [ +  + ]:           1296 :     if (matchctx->next_match < matchctx->nmatches)
                               1449                 :                :     {
                               1450                 :                :         ArrayType  *result_ary;
                               1451                 :                : 
 3497                          1452                 :            330 :         result_ary = build_regexp_match_result(matchctx);
 6791                          1453                 :            330 :         matchctx->next_match++;
                               1454                 :            330 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
                               1455                 :                :     }
                               1456                 :                : 
 6935 neilc@samurai.com        1457                 :            966 :     SRF_RETURN_DONE(funcctx);
                               1458                 :                : }
                               1459                 :                : 
                               1460                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1461                 :                : Datum
                               1462                 :           1143 : regexp_matches_no_flags(PG_FUNCTION_ARGS)
                               1463                 :                : {
                               1464                 :           1143 :     return regexp_matches(fcinfo);
                               1465                 :                : }
                               1466                 :                : 
                               1467                 :                : /*
                               1468                 :                :  * setup_regexp_matches --- do the initial matching for regexp_match,
                               1469                 :                :  *      regexp_split, and related functions
                               1470                 :                :  *
                               1471                 :                :  * To avoid having to re-find the compiled pattern on each call, we do
                               1472                 :                :  * all the matching in one swoop.  The returned regexp_matches_ctx contains
                               1473                 :                :  * the locations of all the substrings matching the pattern.
                               1474                 :                :  *
                               1475                 :                :  * start_search: the character (not byte) offset in orig_str at which to
                               1476                 :                :  * begin the search.  Returned positions are relative to orig_str anyway.
                               1477                 :                :  * use_subpatterns: collect data about matches to parenthesized subexpressions.
                               1478                 :                :  * ignore_degenerate: ignore zero-length matches.
                               1479                 :                :  * fetching_unmatched: caller wants to fetch unmatched substrings.
                               1480                 :                :  *
                               1481                 :                :  * We don't currently assume that fetching_unmatched is exclusive of fetching
                               1482                 :                :  * the matched text too; if it's set, the conversion buffer is large enough to
                               1483                 :                :  * fetch any single matched or unmatched string, but not any larger
                               1484                 :                :  * substring.  (In practice, when splitting the matches are usually small
                               1485                 :                :  * anyway, and it didn't seem worth complicating the code further.)
                               1486                 :                :  */
                               1487                 :                : static regexp_matches_ctx *
 3497 tgl@sss.pgh.pa.us        1488                 :         102694 : setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
                               1489                 :                :                      int start_search,
                               1490                 :                :                      Oid collation,
                               1491                 :                :                      bool use_subpatterns,
                               1492                 :                :                      bool ignore_degenerate,
                               1493                 :                :                      bool fetching_unmatched)
                               1494                 :                : {
   95 michael@paquier.xyz      1495                 :GNC      102694 :     regexp_matches_ctx *matchctx = palloc0_object(regexp_matches_ctx);
 2756 rhodiumtoad@postgres     1496                 :CBC      102694 :     int         eml = pg_database_encoding_max_length();
                               1497                 :                :     int         orig_len;
                               1498                 :                :     pg_wchar   *wide_str;
                               1499                 :                :     int         wide_len;
                               1500                 :                :     int         cflags;
                               1501                 :                :     regex_t    *cpattern;
                               1502                 :                :     regmatch_t *pmatch;
                               1503                 :                :     int         pmatch_len;
                               1504                 :                :     int         array_len;
                               1505                 :                :     int         array_idx;
                               1506                 :                :     int         prev_match_end;
                               1507                 :                :     int         prev_valid_match_end;
                               1508                 :         102694 :     int         maxlen = 0;     /* largest fetch length in characters */
                               1509                 :                : 
                               1510                 :                :     /* save original string --- we'll extract result substrings from it */
 6935 neilc@samurai.com        1511                 :         102694 :     matchctx->orig_str = orig_str;
                               1512                 :                : 
                               1513                 :                :     /* convert string to pg_wchar form for matching */
 6750 tgl@sss.pgh.pa.us        1514   [ -  +  -  -  :         102694 :     orig_len = VARSIZE_ANY_EXHDR(orig_str);
                                     -  -  -  -  +  
                                                 + ]
   95 michael@paquier.xyz      1515                 :GNC      102694 :     wide_str = palloc_array(pg_wchar, orig_len + 1);
 6750 tgl@sss.pgh.pa.us        1516         [ +  + ]:CBC      102694 :     wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
                               1517                 :                : 
                               1518                 :                :     /* set up the compiled pattern */
 1679                          1519                 :         102694 :     cflags = re_flags->cflags;
                               1520         [ +  + ]:         102694 :     if (!use_subpatterns)
                               1521                 :         100405 :         cflags |= REG_NOSUB;
                               1522                 :         102694 :     cpattern = RE_compile_and_cache(pattern, cflags, collation);
                               1523                 :                : 
                               1524                 :                :     /* do we want to remember subpatterns? */
 6791                          1525   [ +  +  +  + ]:         102688 :     if (use_subpatterns && cpattern->re_nsub > 0)
                               1526                 :                :     {
                               1527                 :           1347 :         matchctx->npatterns = cpattern->re_nsub;
                               1528                 :           1347 :         pmatch_len = cpattern->re_nsub + 1;
                               1529                 :                :     }
                               1530                 :                :     else
                               1531                 :                :     {
                               1532                 :         101341 :         use_subpatterns = false;
                               1533                 :         101341 :         matchctx->npatterns = 1;
                               1534                 :         101341 :         pmatch_len = 1;
                               1535                 :                :     }
                               1536                 :                : 
                               1537                 :                :     /* temporary output space for RE package */
   95 michael@paquier.xyz      1538                 :GNC      102688 :     pmatch = palloc_array(regmatch_t, pmatch_len);
                               1539                 :                : 
                               1540                 :                :     /*
                               1541                 :                :      * the real output space (grown dynamically if needed)
                               1542                 :                :      *
                               1543                 :                :      * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
                               1544                 :                :      * than at 2^27
                               1545                 :                :      */
 2756 rhodiumtoad@postgres     1546         [ +  + ]:CBC      102688 :     array_len = re_flags->glob ? 255 : 31;
   95 michael@paquier.xyz      1547                 :GNC      102688 :     matchctx->match_locs = palloc_array(int, array_len);
 6791 tgl@sss.pgh.pa.us        1548                 :CBC      102688 :     array_idx = 0;
                               1549                 :                : 
                               1550                 :                :     /* search for the pattern, perhaps repeatedly */
                               1551                 :         102688 :     prev_match_end = 0;
 2741 rhodiumtoad@postgres     1552                 :         102688 :     prev_valid_match_end = 0;
 6791 tgl@sss.pgh.pa.us        1553         [ +  + ]:         548712 :     while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
                               1554                 :                :                             pmatch_len, pmatch))
                               1555                 :                :     {
                               1556                 :                :         /*
                               1557                 :                :          * If requested, ignore degenerate matches, which are zero-length
                               1558                 :                :          * matches occurring at the start or end of a string or just after a
                               1559                 :                :          * previous match.
                               1560                 :                :          */
                               1561         [ +  + ]:         447478 :         if (!ignore_degenerate ||
                               1562         [ +  + ]:         445749 :             (pmatch[0].rm_so < wide_len &&
                               1563         [ +  + ]:         445728 :              pmatch[0].rm_eo > prev_match_end))
                               1564                 :                :         {
                               1565                 :                :             /* enlarge output space if needed */
 2756 rhodiumtoad@postgres     1566         [ +  + ]:         447568 :             while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
                               1567                 :                :             {
 2489 tgl@sss.pgh.pa.us        1568                 :            180 :                 array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
                               1569         [ -  + ]:            180 :                 if (array_len > MaxAllocSize / sizeof(int))
 2756 rhodiumtoad@postgres     1570         [ #  # ]:UBC           0 :                     ereport(ERROR,
                               1571                 :                :                             (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                               1572                 :                :                              errmsg("too many regular expression matches")));
 6791 tgl@sss.pgh.pa.us        1573                 :CBC         180 :                 matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
                               1574                 :                :                                                         sizeof(int) * array_len);
                               1575                 :                :             }
                               1576                 :                : 
                               1577                 :                :             /* save this match's locations */
                               1578         [ +  + ]:         447388 :             if (use_subpatterns)
                               1579                 :                :             {
                               1580                 :                :                 int         i;
                               1581                 :                : 
                               1582         [ +  + ]:           3972 :                 for (i = 1; i <= matchctx->npatterns; i++)
                               1583                 :                :                 {
 2489                          1584                 :           2681 :                     int         so = pmatch[i].rm_so;
                               1585                 :           2681 :                     int         eo = pmatch[i].rm_eo;
                               1586                 :                : 
 2756 rhodiumtoad@postgres     1587                 :           2681 :                     matchctx->match_locs[array_idx++] = so;
                               1588                 :           2681 :                     matchctx->match_locs[array_idx++] = eo;
                               1589   [ +  +  +  -  :           2681 :                     if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
                                              +  + ]
                               1590                 :           1722 :                         maxlen = (eo - so);
                               1591                 :                :                 }
                               1592                 :                :             }
                               1593                 :                :             else
                               1594                 :                :             {
 2489 tgl@sss.pgh.pa.us        1595                 :         446097 :                 int         so = pmatch[0].rm_so;
                               1596                 :         446097 :                 int         eo = pmatch[0].rm_eo;
                               1597                 :                : 
 2756 rhodiumtoad@postgres     1598                 :         446097 :                 matchctx->match_locs[array_idx++] = so;
                               1599                 :         446097 :                 matchctx->match_locs[array_idx++] = eo;
                               1600   [ +  -  +  -  :         446097 :                 if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
                                              +  + ]
                               1601                 :         100562 :                     maxlen = (eo - so);
                               1602                 :                :             }
 6791 tgl@sss.pgh.pa.us        1603                 :         447388 :             matchctx->nmatches++;
                               1604                 :                : 
                               1605                 :                :             /*
                               1606                 :                :              * check length of unmatched portion between end of previous valid
                               1607                 :                :              * (nondegenerate, or degenerate but not ignored) match and start
                               1608                 :                :              * of current one
                               1609                 :                :              */
 2756 rhodiumtoad@postgres     1610         [ +  + ]:         447388 :             if (fetching_unmatched &&
                               1611         [ +  - ]:         445659 :                 pmatch[0].rm_so >= 0 &&
 2741                          1612         [ +  + ]:         445659 :                 (pmatch[0].rm_so - prev_valid_match_end) > maxlen)
                               1613                 :         190564 :                 maxlen = (pmatch[0].rm_so - prev_valid_match_end);
                               1614                 :         447388 :             prev_valid_match_end = pmatch[0].rm_eo;
                               1615                 :                :         }
 6791 tgl@sss.pgh.pa.us        1616                 :         447478 :         prev_match_end = pmatch[0].rm_eo;
                               1617                 :                : 
                               1618                 :                :         /* if not glob, stop after one match */
 3497                          1619         [ +  + ]:         447478 :         if (!re_flags->glob)
 6791                          1620                 :           1421 :             break;
                               1621                 :                : 
                               1622                 :                :         /*
                               1623                 :                :          * Advance search position.  Normally we start the next search at the
                               1624                 :                :          * end of the previous match; but if the match was of zero length, we
                               1625                 :                :          * have to advance by one character, or we'd just find the same match
                               1626                 :                :          * again.
                               1627                 :                :          */
 4610                          1628                 :         446057 :         start_search = prev_match_end;
                               1629         [ +  + ]:         446057 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
 6791                          1630                 :            588 :             start_search++;
                               1631         [ +  + ]:         446057 :         if (start_search > wide_len)
                               1632                 :             33 :             break;
                               1633                 :                :     }
                               1634                 :                : 
                               1635                 :                :     /*
                               1636                 :                :      * check length of unmatched portion between end of last match and end of
                               1637                 :                :      * input string
                               1638                 :                :      */
 2756 rhodiumtoad@postgres     1639         [ +  + ]:         102688 :     if (fetching_unmatched &&
 2741                          1640         [ +  + ]:         100336 :         (wide_len - prev_valid_match_end) > maxlen)
                               1641                 :             96 :         maxlen = (wide_len - prev_valid_match_end);
                               1642                 :                : 
                               1643                 :                :     /*
                               1644                 :                :      * Keep a note of the end position of the string for the benefit of
                               1645                 :                :      * splitting code.
                               1646                 :                :      */
 2756                          1647                 :         102688 :     matchctx->match_locs[array_idx] = wide_len;
                               1648                 :                : 
                               1649         [ +  - ]:         102688 :     if (eml > 1)
                               1650                 :                :     {
                               1651                 :         102688 :         int64       maxsiz = eml * (int64) maxlen;
                               1652                 :                :         int         conv_bufsiz;
                               1653                 :                : 
                               1654                 :                :         /*
                               1655                 :                :          * Make the conversion buffer large enough for any substring of
                               1656                 :                :          * interest.
                               1657                 :                :          *
                               1658                 :                :          * Worst case: assume we need the maximum size (maxlen*eml), but take
                               1659                 :                :          * advantage of the fact that the original string length in bytes is
                               1660                 :                :          * an upper bound on the byte length of any fetched substring (and we
                               1661                 :                :          * know that len+1 is safe to allocate because the varlena header is
                               1662                 :                :          * longer than 1 byte).
                               1663                 :                :          */
                               1664         [ +  + ]:         102688 :         if (maxsiz > orig_len)
                               1665                 :         100517 :             conv_bufsiz = orig_len + 1;
                               1666                 :                :         else
                               1667                 :           2171 :             conv_bufsiz = maxsiz + 1;   /* safe since maxsiz < 2^30 */
                               1668                 :                : 
                               1669                 :         102688 :         matchctx->conv_buf = palloc(conv_bufsiz);
                               1670                 :         102688 :         matchctx->conv_bufsiz = conv_bufsiz;
                               1671                 :         102688 :         matchctx->wide_str = wide_str;
                               1672                 :                :     }
                               1673                 :                :     else
                               1674                 :                :     {
                               1675                 :                :         /* No need to keep the wide string if we're in a single-byte charset. */
 2756 rhodiumtoad@postgres     1676                 :UBC           0 :         pfree(wide_str);
                               1677                 :              0 :         matchctx->wide_str = NULL;
                               1678                 :              0 :         matchctx->conv_buf = NULL;
                               1679                 :              0 :         matchctx->conv_bufsiz = 0;
                               1680                 :                :     }
                               1681                 :                : 
                               1682                 :                :     /* Clean up temp storage */
 6791 tgl@sss.pgh.pa.us        1683                 :CBC      102688 :     pfree(pmatch);
                               1684                 :                : 
                               1685                 :         102688 :     return matchctx;
                               1686                 :                : }
                               1687                 :                : 
                               1688                 :                : /*
                               1689                 :                :  * build_regexp_match_result - build output array for current match
                               1690                 :                :  */
                               1691                 :                : static ArrayType *
 3497                          1692                 :           1531 : build_regexp_match_result(regexp_matches_ctx *matchctx)
                               1693                 :                : {
 2756 rhodiumtoad@postgres     1694                 :           1531 :     char       *buf = matchctx->conv_buf;
 6791 tgl@sss.pgh.pa.us        1695                 :           1531 :     Datum      *elems = matchctx->elems;
                               1696                 :           1531 :     bool       *nulls = matchctx->nulls;
                               1697                 :                :     int         dims[1];
                               1698                 :                :     int         lbs[1];
                               1699                 :                :     int         loc;
                               1700                 :                :     int         i;
                               1701                 :                : 
                               1702                 :                :     /* Extract matching substrings from the original string */
                               1703                 :           1531 :     loc = matchctx->next_match * matchctx->npatterns * 2;
                               1704         [ +  + ]:           4317 :     for (i = 0; i < matchctx->npatterns; i++)
                               1705                 :                :     {
 6695 bruce@momjian.us         1706                 :           2786 :         int         so = matchctx->match_locs[loc++];
                               1707                 :           2786 :         int         eo = matchctx->match_locs[loc++];
                               1708                 :                : 
 6791 tgl@sss.pgh.pa.us        1709   [ +  +  -  + ]:           2786 :         if (so < 0 || eo < 0)
                               1710                 :                :         {
                               1711                 :              3 :             elems[i] = (Datum) 0;
                               1712                 :              3 :             nulls[i] = true;
                               1713                 :                :         }
 2756 rhodiumtoad@postgres     1714         [ +  - ]:           2783 :         else if (buf)
                               1715                 :                :         {
 2489 tgl@sss.pgh.pa.us        1716                 :           2783 :             int         len = pg_wchar2mb_with_len(matchctx->wide_str + so,
                               1717                 :                :                                                    buf,
                               1718                 :                :                                                    eo - so);
                               1719                 :                : 
 2250                          1720         [ -  + ]:           2783 :             Assert(len < matchctx->conv_bufsiz);
 2756 rhodiumtoad@postgres     1721                 :           2783 :             elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
                               1722                 :           2783 :             nulls[i] = false;
                               1723                 :                :         }
                               1724                 :                :         else
                               1725                 :                :         {
 6791 tgl@sss.pgh.pa.us        1726                 :UBC           0 :             elems[i] = DirectFunctionCall3(text_substr,
                               1727                 :                :                                            PointerGetDatum(matchctx->orig_str),
                               1728                 :                :                                            Int32GetDatum(so + 1),
                               1729                 :                :                                            Int32GetDatum(eo - so));
                               1730                 :              0 :             nulls[i] = false;
                               1731                 :                :         }
                               1732                 :                :     }
                               1733                 :                : 
                               1734                 :                :     /* And form an array */
 6791 tgl@sss.pgh.pa.us        1735                 :CBC        1531 :     dims[0] = matchctx->npatterns;
                               1736                 :           1531 :     lbs[0] = 1;
                               1737                 :                :     /* XXX: this hardcodes assumptions about the text type */
                               1738                 :           1531 :     return construct_md_array(elems, nulls, 1, dims, lbs,
                               1739                 :                :                               TEXTOID, -1, false, TYPALIGN_INT);
                               1740                 :                : }
                               1741                 :                : 
                               1742                 :                : /*
                               1743                 :                :  * regexp_split_to_table()
                               1744                 :                :  *      Split the string at matches of the pattern, returning the
                               1745                 :                :  *      split-out substrings as a table.
                               1746                 :                :  */
                               1747                 :                : Datum
 6935 neilc@samurai.com        1748                 :            311 : regexp_split_to_table(PG_FUNCTION_ARGS)
                               1749                 :                : {
                               1750                 :                :     FuncCallContext *funcctx;
                               1751                 :                :     regexp_matches_ctx *splitctx;
                               1752                 :                : 
                               1753         [ +  + ]:            311 :     if (SRF_IS_FIRSTCALL())
                               1754                 :                :     {
 6695 bruce@momjian.us         1755                 :             26 :         text       *pattern = PG_GETARG_TEXT_PP(1);
                               1756         [ +  + ]:             26 :         text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
                               1757                 :                :         pg_re_flags re_flags;
                               1758                 :                :         MemoryContext oldcontext;
                               1759                 :                : 
 6935 neilc@samurai.com        1760                 :             26 :         funcctx = SRF_FIRSTCALL_INIT();
                               1761                 :             26 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
                               1762                 :                : 
                               1763                 :                :         /* Determine options */
 3497 tgl@sss.pgh.pa.us        1764                 :             26 :         parse_re_flags(&re_flags, flags);
                               1765                 :                :         /* User mustn't specify 'g' */
                               1766         [ +  + ]:             23 :         if (re_flags.glob)
                               1767         [ +  - ]:              3 :             ereport(ERROR,
                               1768                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1769                 :                :             /* translator: %s is a SQL function name */
                               1770                 :                :                      errmsg("%s does not support the \"global\" option",
                               1771                 :                :                             "regexp_split_to_table()")));
                               1772                 :                :         /* But we find all the matches anyway */
                               1773                 :             20 :         re_flags.glob = true;
                               1774                 :                : 
                               1775                 :                :         /* be sure to copy the input string into the multi-call ctx */
 6791                          1776                 :             20 :         splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
                               1777                 :                :                                         &re_flags, 0,
                               1778                 :                :                                         PG_GET_COLLATION(),
                               1779                 :                :                                         false, true, true);
                               1780                 :                : 
 6935 neilc@samurai.com        1781                 :             20 :         MemoryContextSwitchTo(oldcontext);
  472 peter@eisentraut.org     1782                 :             20 :         funcctx->user_fctx = splitctx;
                               1783                 :                :     }
                               1784                 :                : 
 6935 neilc@samurai.com        1785                 :            305 :     funcctx = SRF_PERCALL_SETUP();
 6791 tgl@sss.pgh.pa.us        1786                 :            305 :     splitctx = (regexp_matches_ctx *) funcctx->user_fctx;
                               1787                 :                : 
                               1788         [ +  + ]:            305 :     if (splitctx->next_match <= splitctx->nmatches)
                               1789                 :                :     {
 6695 bruce@momjian.us         1790                 :            285 :         Datum       result = build_regexp_split_result(splitctx);
                               1791                 :                : 
 6791 tgl@sss.pgh.pa.us        1792                 :            285 :         splitctx->next_match++;
                               1793                 :            285 :         SRF_RETURN_NEXT(funcctx, result);
                               1794                 :                :     }
                               1795                 :                : 
                               1796                 :             20 :     SRF_RETURN_DONE(funcctx);
                               1797                 :                : }
                               1798                 :                : 
                               1799                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1800                 :                : Datum
 6695 bruce@momjian.us         1801                 :            276 : regexp_split_to_table_no_flags(PG_FUNCTION_ARGS)
                               1802                 :                : {
 6935 neilc@samurai.com        1803                 :            276 :     return regexp_split_to_table(fcinfo);
                               1804                 :                : }
                               1805                 :                : 
                               1806                 :                : /*
                               1807                 :                :  * regexp_split_to_array()
                               1808                 :                :  *      Split the string at matches of the pattern, returning the
                               1809                 :                :  *      split-out substrings as an array.
                               1810                 :                :  */
                               1811                 :                : Datum
 6695 bruce@momjian.us         1812                 :         100322 : regexp_split_to_array(PG_FUNCTION_ARGS)
                               1813                 :                : {
                               1814                 :         100322 :     ArrayBuildState *astate = NULL;
                               1815                 :                :     pg_re_flags re_flags;
                               1816                 :                :     regexp_matches_ctx *splitctx;
                               1817                 :                : 
                               1818                 :                :     /* Determine options */
 3497 tgl@sss.pgh.pa.us        1819         [ +  + ]:         100322 :     parse_re_flags(&re_flags, PG_GETARG_TEXT_PP_IF_EXISTS(2));
                               1820                 :                :     /* User mustn't specify 'g' */
                               1821         [ +  + ]:         100319 :     if (re_flags.glob)
                               1822         [ +  - ]:              3 :         ereport(ERROR,
                               1823                 :                :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1824                 :                :         /* translator: %s is a SQL function name */
                               1825                 :                :                  errmsg("%s does not support the \"global\" option",
                               1826                 :                :                         "regexp_split_to_array()")));
                               1827                 :                :     /* But we find all the matches anyway */
                               1828                 :         100316 :     re_flags.glob = true;
                               1829                 :                : 
 6750                          1830                 :         100316 :     splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
                               1831                 :         100316 :                                     PG_GETARG_TEXT_PP(1),
                               1832                 :                :                                     &re_flags, 0,
                               1833                 :                :                                     PG_GET_COLLATION(),
                               1834                 :                :                                     false, true, true);
                               1835                 :                : 
 6791                          1836         [ +  + ]:         646026 :     while (splitctx->next_match <= splitctx->nmatches)
                               1837                 :                :     {
 6935 neilc@samurai.com        1838                 :         545710 :         astate = accumArrayResult(astate,
                               1839                 :                :                                   build_regexp_split_result(splitctx),
                               1840                 :                :                                   false,
                               1841                 :                :                                   TEXTOID,
                               1842                 :                :                                   CurrentMemoryContext);
 6791 tgl@sss.pgh.pa.us        1843                 :         545710 :         splitctx->next_match++;
                               1844                 :                :     }
                               1845                 :                : 
 1295 peter@eisentraut.org     1846                 :         100316 :     PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
                               1847                 :                : }
                               1848                 :                : 
                               1849                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1850                 :                : Datum
 6695 bruce@momjian.us         1851                 :         100301 : regexp_split_to_array_no_flags(PG_FUNCTION_ARGS)
                               1852                 :                : {
 6935 neilc@samurai.com        1853                 :         100301 :     return regexp_split_to_array(fcinfo);
                               1854                 :                : }
                               1855                 :                : 
                               1856                 :                : /*
                               1857                 :                :  * build_regexp_split_result - build output string for current match
                               1858                 :                :  *
                               1859                 :                :  * We return the string between the current match and the previous one,
                               1860                 :                :  * or the string after the last match when next_match == nmatches.
                               1861                 :                :  */
                               1862                 :                : static Datum
 6695 bruce@momjian.us         1863                 :         545995 : build_regexp_split_result(regexp_matches_ctx *splitctx)
                               1864                 :                : {
 2756 rhodiumtoad@postgres     1865                 :         545995 :     char       *buf = splitctx->conv_buf;
                               1866                 :                :     int         startpos;
                               1867                 :                :     int         endpos;
                               1868                 :                : 
 6791 tgl@sss.pgh.pa.us        1869         [ +  + ]:         545995 :     if (splitctx->next_match > 0)
                               1870                 :         445659 :         startpos = splitctx->match_locs[splitctx->next_match * 2 - 1];
                               1871                 :                :     else
                               1872                 :         100336 :         startpos = 0;
                               1873         [ -  + ]:         545995 :     if (startpos < 0)
 6791 tgl@sss.pgh.pa.us        1874         [ #  # ]:UBC           0 :         elog(ERROR, "invalid match ending position");
                               1875                 :                : 
 2250 tgl@sss.pgh.pa.us        1876                 :CBC      545995 :     endpos = splitctx->match_locs[splitctx->next_match * 2];
                               1877         [ -  + ]:         545995 :     if (endpos < startpos)
 2250 tgl@sss.pgh.pa.us        1878         [ #  # ]:UBC           0 :         elog(ERROR, "invalid match starting position");
                               1879                 :                : 
 2756 rhodiumtoad@postgres     1880         [ +  - ]:CBC      545995 :     if (buf)
                               1881                 :                :     {
                               1882                 :                :         int         len;
                               1883                 :                : 
                               1884                 :         545995 :         len = pg_wchar2mb_with_len(splitctx->wide_str + startpos,
                               1885                 :                :                                    buf,
                               1886                 :                :                                    endpos - startpos);
 2250 tgl@sss.pgh.pa.us        1887         [ -  + ]:         545995 :         Assert(len < splitctx->conv_bufsiz);
 2756 rhodiumtoad@postgres     1888                 :         545995 :         return PointerGetDatum(cstring_to_text_with_len(buf, len));
                               1889                 :                :     }
                               1890                 :                :     else
                               1891                 :                :     {
 2756 rhodiumtoad@postgres     1892                 :UBC           0 :         return DirectFunctionCall3(text_substr,
                               1893                 :                :                                    PointerGetDatum(splitctx->orig_str),
                               1894                 :                :                                    Int32GetDatum(startpos + 1),
                               1895                 :                :                                    Int32GetDatum(endpos - startpos));
                               1896                 :                :     }
                               1897                 :                : }
                               1898                 :                : 
                               1899                 :                : /*
                               1900                 :                :  * regexp_substr()
                               1901                 :                :  *      Return the substring that matches a regular expression pattern
                               1902                 :                :  */
                               1903                 :                : Datum
 1685 tgl@sss.pgh.pa.us        1904                 :CBC          54 : regexp_substr(PG_FUNCTION_ARGS)
                               1905                 :                : {
                               1906                 :             54 :     text       *str = PG_GETARG_TEXT_PP(0);
                               1907                 :             54 :     text       *pattern = PG_GETARG_TEXT_PP(1);
                               1908                 :             54 :     int         start = 1;
                               1909                 :             54 :     int         n = 1;
                               1910         [ +  + ]:             54 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(4);
                               1911                 :             54 :     int         subexpr = 0;
                               1912                 :                :     int         so,
                               1913                 :                :                 eo,
                               1914                 :                :                 pos;
                               1915                 :                :     pg_re_flags re_flags;
                               1916                 :                :     regexp_matches_ctx *matchctx;
                               1917                 :                : 
                               1918                 :                :     /* Collect optional parameters */
                               1919         [ +  + ]:             54 :     if (PG_NARGS() > 2)
                               1920                 :                :     {
                               1921                 :             45 :         start = PG_GETARG_INT32(2);
                               1922         [ +  + ]:             45 :         if (start <= 0)
                               1923         [ +  - ]:              3 :             ereport(ERROR,
                               1924                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1925                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                               1926                 :                :                             "start", start)));
                               1927                 :                :     }
                               1928         [ +  + ]:             51 :     if (PG_NARGS() > 3)
                               1929                 :                :     {
                               1930                 :             39 :         n = PG_GETARG_INT32(3);
                               1931         [ +  + ]:             39 :         if (n <= 0)
                               1932         [ +  - ]:              3 :             ereport(ERROR,
                               1933                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1934                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                               1935                 :                :                             "n", n)));
                               1936                 :                :     }
                               1937         [ +  + ]:             48 :     if (PG_NARGS() > 5)
                               1938                 :                :     {
                               1939                 :             24 :         subexpr = PG_GETARG_INT32(5);
                               1940         [ +  + ]:             24 :         if (subexpr < 0)
                               1941         [ +  - ]:              3 :             ereport(ERROR,
                               1942                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1943                 :                :                      errmsg("invalid value for parameter \"%s\": %d",
                               1944                 :                :                             "subexpr", subexpr)));
                               1945                 :                :     }
                               1946                 :                : 
                               1947                 :                :     /* Determine options */
                               1948                 :             45 :     parse_re_flags(&re_flags, flags);
                               1949                 :                :     /* User mustn't specify 'g' */
                               1950         [ +  + ]:             45 :     if (re_flags.glob)
                               1951         [ +  - ]:              3 :         ereport(ERROR,
                               1952                 :                :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               1953                 :                :         /* translator: %s is a SQL function name */
                               1954                 :                :                  errmsg("%s does not support the \"global\" option",
                               1955                 :                :                         "regexp_substr()")));
                               1956                 :                :     /* But we find all the matches anyway */
                               1957                 :             42 :     re_flags.glob = true;
                               1958                 :                : 
                               1959                 :                :     /* Do the matching */
                               1960                 :             42 :     matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
                               1961                 :                :                                     PG_GET_COLLATION(),
                               1962                 :                :                                     (subexpr > 0),   /* need submatches? */
                               1963                 :                :                                     false, false);
                               1964                 :                : 
                               1965                 :                :     /* When n exceeds matches return NULL (includes case of no matches) */
                               1966         [ +  + ]:             42 :     if (n > matchctx->nmatches)
                               1967                 :              6 :         PG_RETURN_NULL();
                               1968                 :                : 
                               1969                 :                :     /* When subexpr exceeds number of subexpressions return NULL */
                               1970         [ +  + ]:             36 :     if (subexpr > matchctx->npatterns)
                               1971                 :              3 :         PG_RETURN_NULL();
                               1972                 :                : 
                               1973                 :                :     /* Select the appropriate match position to return */
                               1974                 :             33 :     pos = (n - 1) * matchctx->npatterns;
                               1975         [ +  + ]:             33 :     if (subexpr > 0)
                               1976                 :             15 :         pos += subexpr - 1;
                               1977                 :             33 :     pos *= 2;
                               1978                 :             33 :     so = matchctx->match_locs[pos];
                               1979                 :             33 :     eo = matchctx->match_locs[pos + 1];
                               1980                 :                : 
                               1981   [ +  +  -  + ]:             33 :     if (so < 0 || eo < 0)
                               1982                 :              3 :         PG_RETURN_NULL();       /* unidentifiable location */
                               1983                 :                : 
                               1984                 :             30 :     PG_RETURN_DATUM(DirectFunctionCall3(text_substr,
                               1985                 :                :                                         PointerGetDatum(matchctx->orig_str),
                               1986                 :                :                                         Int32GetDatum(so + 1),
                               1987                 :                :                                         Int32GetDatum(eo - so)));
                               1988                 :                : }
                               1989                 :                : 
                               1990                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1991                 :                : Datum
                               1992                 :              9 : regexp_substr_no_start(PG_FUNCTION_ARGS)
                               1993                 :                : {
                               1994                 :              9 :     return regexp_substr(fcinfo);
                               1995                 :                : }
                               1996                 :                : 
                               1997                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               1998                 :                : Datum
                               1999                 :              3 : regexp_substr_no_n(PG_FUNCTION_ARGS)
                               2000                 :                : {
                               2001                 :              3 :     return regexp_substr(fcinfo);
                               2002                 :                : }
                               2003                 :                : 
                               2004                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               2005                 :                : Datum
                               2006                 :             12 : regexp_substr_no_flags(PG_FUNCTION_ARGS)
                               2007                 :                : {
                               2008                 :             12 :     return regexp_substr(fcinfo);
                               2009                 :                : }
                               2010                 :                : 
                               2011                 :                : /* This is separate to keep the opr_sanity regression test from complaining */
                               2012                 :                : Datum
                               2013                 :              6 : regexp_substr_no_subexpr(PG_FUNCTION_ARGS)
                               2014                 :                : {
                               2015                 :              6 :     return regexp_substr(fcinfo);
                               2016                 :                : }
                               2017                 :                : 
                               2018                 :                : /*
                               2019                 :                :  * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
                               2020                 :                :  *
                               2021                 :                :  * The result is NULL if there is no fixed prefix, else a palloc'd string.
                               2022                 :                :  * If it is an exact match, not just a prefix, *exact is returned as true.
                               2023                 :                :  */
                               2024                 :                : char *
 4996                          2025                 :           8541 : regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
                               2026                 :                :                     bool *exact)
                               2027                 :                : {
                               2028                 :                :     char       *result;
                               2029                 :                :     regex_t    *re;
                               2030                 :                :     int         cflags;
                               2031                 :                :     int         re_result;
                               2032                 :                :     pg_wchar   *str;
                               2033                 :                :     size_t      slen;
                               2034                 :                :     size_t      maxlen;
                               2035                 :                :     char        errMsg[100];
                               2036                 :                : 
                               2037                 :           8541 :     *exact = false;             /* default result */
                               2038                 :                : 
                               2039                 :                :     /* Compile RE */
                               2040                 :           8541 :     cflags = REG_ADVANCED;
                               2041         [ +  + ]:           8541 :     if (case_insensitive)
                               2042                 :             37 :         cflags |= REG_ICASE;
                               2043                 :                : 
 1679                          2044                 :           8541 :     re = RE_compile_and_cache(text_re, cflags | REG_NOSUB, collation);
                               2045                 :                : 
                               2046                 :                :     /* Examine it to see if there's a fixed prefix */
 4996                          2047                 :           8529 :     re_result = pg_regprefix(re, &str, &slen);
                               2048                 :                : 
                               2049   [ +  +  +  - ]:           8529 :     switch (re_result)
                               2050                 :                :     {
                               2051                 :            389 :         case REG_NOMATCH:
                               2052                 :            389 :             return NULL;
                               2053                 :                : 
                               2054                 :           1575 :         case REG_PREFIX:
                               2055                 :                :             /* continue with wchar conversion */
                               2056                 :           1575 :             break;
                               2057                 :                : 
                               2058                 :           6565 :         case REG_EXACT:
                               2059                 :           6565 :             *exact = true;
                               2060                 :                :             /* continue with wchar conversion */
                               2061                 :           6565 :             break;
                               2062                 :                : 
 4996 tgl@sss.pgh.pa.us        2063                 :UBC           0 :         default:
                               2064                 :                :             /* re failed??? */
                               2065                 :              0 :             pg_regerror(re_result, re, errMsg, sizeof(errMsg));
                               2066         [ #  # ]:              0 :             ereport(ERROR,
                               2067                 :                :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                               2068                 :                :                      errmsg("regular expression failed: %s", errMsg)));
                               2069                 :                :             break;
                               2070                 :                :     }
                               2071                 :                : 
                               2072                 :                :     /* Convert pg_wchar result back to database encoding */
 4996 tgl@sss.pgh.pa.us        2073                 :CBC        8140 :     maxlen = pg_database_encoding_max_length() * slen + 1;
                               2074                 :           8140 :     result = (char *) palloc(maxlen);
                               2075                 :           8140 :     slen = pg_wchar2mb_with_len(str, result, slen);
                               2076         [ -  + ]:           8140 :     Assert(slen < maxlen);
                               2077                 :                : 
 1072 tmunro@postgresql.or     2078                 :           8140 :     pfree(str);
                               2079                 :                : 
 4996 tgl@sss.pgh.pa.us        2080                 :           8140 :     return result;
                               2081                 :                : }

Generated by: LCOV version 2.4-beta