LCOV - 0e5ff9b9b45a657aea12440478dc002e9b01f138 vs 0123ce131fca454009439dfa3b2266d1d40737d7

LCOV - differential code coverage report

Current view:	top level - src/test/modules/test_regex - test_regex.c (source / functions)		Coverage	Total	Hit	UBC	GNC	CBC	EUB	ECB	DCB
Current:	0e5ff9b9b45a657aea12440478dc002e9b01f138 vs 0123ce131fca454009439dfa3b2266d1d40737d7	Lines:	86.4 %	309	267	42	6	261			8
Current Date:	2026-03-14 14:10:32 -0400	Functions:	100.0 %	9	9		2	7
Baseline:	lcov-20260315-024220-baseline	Branches:	71.9 %	139	100	39		100	27	9
Baseline Date:	2026-03-14 15:27:56 +0100	Line coverage date bins:
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	(30,360] days:	100.0 %	6	6		6
		(360..) days:	86.1 %	303	261	42		261
		Function coverage date bins:
		(360..) days:	100.0 %	9	9		2	7
		Branch coverage date bins:
		(360..) days:	57.1 %	175	100	39		100	27	9

 Age         Owner                    Branch data    TLA  Line data    Source code

                                  1                 :                : /*--------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * test_regex.c
                                  4                 :                :  *      Test harness for the regular expression package.
                                  5                 :                :  *
                                  6                 :                :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
                                  7                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :                :  *
                                  9                 :                :  * IDENTIFICATION
                                 10                 :                :  *      src/test/modules/test_regex/test_regex.c
                                 11                 :                :  *
                                 12                 :                :  * -------------------------------------------------------------------------
                                 13                 :                :  */
                                 14                 :                : 
                                 15                 :                : #include "postgres.h"
                                 16                 :                : 
                                 17                 :                : #include "funcapi.h"
                                 18                 :                : #include "regex/regex.h"
                                 19                 :                : #include "utils/array.h"
                                 20                 :                : #include "utils/builtins.h"
                                 21                 :                : 
 1894 tgl@sss.pgh.pa.us          22                 :CBC           2 : PG_MODULE_MAGIC;
                                 23                 :                : 
                                 24                 :                : 
                                 25                 :                : /* all the options of interest for regex functions */
                                 26                 :                : typedef struct test_re_flags
                                 27                 :                : {
                                 28                 :                :     int         cflags;         /* compile flags for Spencer's regex code */
                                 29                 :                :     int         eflags;         /* execute flags for Spencer's regex code */
                                 30                 :                :     long        info;           /* expected re_info bits */
                                 31                 :                :     bool        glob;           /* do it globally (for each occurrence) */
                                 32                 :                :     bool        indices;        /* report indices not actual strings */
                                 33                 :                :     bool        partial;        /* expect partial match */
                                 34                 :                : } test_re_flags;
                                 35                 :                : 
                                 36                 :                : /* cross-call state for test_regex() */
                                 37                 :                : typedef struct test_regex_ctx
                                 38                 :                : {
                                 39                 :                :     test_re_flags re_flags;     /* flags */
                                 40                 :                :     rm_detail_t details;        /* "details" from execution */
                                 41                 :                :     text       *orig_str;       /* data string in original TEXT form */
                                 42                 :                :     int         nmatches;       /* number of places where pattern matched */
                                 43                 :                :     int         npatterns;      /* number of capturing subpatterns */
                                 44                 :                :     /* We store start char index and end+1 char index for each match */
                                 45                 :                :     /* so the number of entries in match_locs is nmatches * npatterns * 2 */
                                 46                 :                :     int        *match_locs;     /* 0-based character indexes */
                                 47                 :                :     int         next_match;     /* 0-based index of next match to process */
                                 48                 :                :     /* workspace for build_test_match_result() */
                                 49                 :                :     Datum      *elems;          /* has npatterns+1 elements */
                                 50                 :                :     bool       *nulls;          /* has npatterns+1 elements */
                                 51                 :                :     pg_wchar   *wide_str;       /* wide-char version of original string */
                                 52                 :                :     char       *conv_buf;       /* conversion buffer, if needed */
                                 53                 :                :     int         conv_bufsiz;    /* size thereof */
                                 54                 :                : } test_regex_ctx;
                                 55                 :                : 
                                 56                 :                : /* Local functions */
                                 57                 :                : static void test_re_compile(text *text_re, int cflags, Oid collation,
                                 58                 :                :                             regex_t *result_re);
                                 59                 :                : static void parse_test_flags(test_re_flags *flags, text *opts);
                                 60                 :                : static test_regex_ctx *setup_test_matches(text *orig_str,
                                 61                 :                :                                           regex_t *cpattern,
                                 62                 :                :                                           test_re_flags *re_flags,
                                 63                 :                :                                           Oid collation,
                                 64                 :                :                                           bool use_subpatterns);
                                 65                 :                : static ArrayType *build_test_info_result(regex_t *cpattern,
                                 66                 :                :                                          test_re_flags *flags);
                                 67                 :                : static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
                                 68                 :                : 
                                 69                 :                : 
                                 70                 :                : /*
                                 71                 :                :  * test_regex(pattern text, string text, flags text) returns setof text[]
                                 72                 :                :  *
                                 73                 :                :  * This is largely based on regexp.c's regexp_matches, with additions
                                 74                 :                :  * for debugging purposes.
                                 75                 :                :  */
                                 76                 :              3 : PG_FUNCTION_INFO_V1(test_regex);
                                 77                 :                : 
                                 78                 :                : Datum
                                 79                 :           1767 : test_regex(PG_FUNCTION_ARGS)
                                 80                 :                : {
                                 81                 :                :     FuncCallContext *funcctx;
                                 82                 :                :     test_regex_ctx *matchctx;
                                 83                 :                :     ArrayType  *result_ary;
                                 84                 :                : 
                                 85         [ +  + ]:           1767 :     if (SRF_IS_FIRSTCALL())
                                 86                 :                :     {
                                 87                 :            696 :         text       *pattern = PG_GETARG_TEXT_PP(0);
                                 88                 :            696 :         text       *flags = PG_GETARG_TEXT_PP(2);
                                 89                 :            696 :         Oid         collation = PG_GET_COLLATION();
                                 90                 :                :         test_re_flags re_flags;
                                 91                 :                :         regex_t     cpattern;
                                 92                 :                :         MemoryContext oldcontext;
                                 93                 :                : 
                                 94                 :            696 :         funcctx = SRF_FIRSTCALL_INIT();
                                 95                 :            696 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
                                 96                 :                : 
                                 97                 :                :         /* Determine options */
                                 98                 :            696 :         parse_test_flags(&re_flags, flags);
                                 99                 :                : 
                                100                 :                :         /* set up the compiled pattern */
                                101                 :            696 :         test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
                                102                 :                : 
                                103                 :                :         /* be sure to copy the input string into the multi-call ctx */
                                104                 :            590 :         matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
                                105                 :                :                                       &re_flags,
                                106                 :                :                                       collation,
                                107                 :                :                                       true);
                                108                 :                : 
                                109                 :                :         /* Pre-create workspace that build_test_match_result needs */
   96 michael@paquier.xyz       110                 :GNC         590 :         matchctx->elems = palloc_array(Datum, matchctx->npatterns + 1);
                                111                 :            590 :         matchctx->nulls = palloc_array(bool, matchctx->npatterns + 1);
                                112                 :                : 
 1894 tgl@sss.pgh.pa.us         113                 :CBC         590 :         MemoryContextSwitchTo(oldcontext);
  472 peter@eisentraut.org      114                 :            590 :         funcctx->user_fctx = matchctx;
                                115                 :                : 
                                116                 :                :         /*
                                117                 :                :          * Return the first result row, which is info equivalent to Tcl's
                                118                 :                :          * "regexp -about" output
                                119                 :                :          */
 1894 tgl@sss.pgh.pa.us         120                 :            590 :         result_ary = build_test_info_result(&cpattern, &re_flags);
                                121                 :                : 
                                122                 :            590 :         pg_regfree(&cpattern);
                                123                 :                : 
                                124                 :            590 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
                                125                 :                :     }
                                126                 :                :     else
                                127                 :                :     {
                                128                 :                :         /* Each subsequent row describes one match */
                                129                 :           1071 :         funcctx = SRF_PERCALL_SETUP();
                                130                 :           1071 :         matchctx = (test_regex_ctx *) funcctx->user_fctx;
                                131                 :                : 
                                132         [ +  + ]:           1071 :         if (matchctx->next_match < matchctx->nmatches)
                                133                 :                :         {
                                134                 :            481 :             result_ary = build_test_match_result(matchctx);
                                135                 :            481 :             matchctx->next_match++;
                                136                 :            481 :             SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
                                137                 :                :         }
                                138                 :                :     }
                                139                 :                : 
                                140                 :            590 :     SRF_RETURN_DONE(funcctx);
                                141                 :                : }
                                142                 :                : 
                                143                 :                : 
                                144                 :                : /*
                                145                 :                :  * test_re_compile - compile a RE
                                146                 :                :  *
                                147                 :                :  *  text_re --- the pattern, expressed as a TEXT object
                                148                 :                :  *  cflags --- compile options for the pattern
                                149                 :                :  *  collation --- collation to use for LC_CTYPE-dependent behavior
                                150                 :                :  *  result_re --- output, compiled RE is stored here
                                151                 :                :  *
                                152                 :                :  * Pattern is given in the database encoding.  We internally convert to
                                153                 :                :  * an array of pg_wchar, which is what Spencer's regex package wants.
                                154                 :                :  *
                                155                 :                :  * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
                                156                 :                :  */
                                157                 :                : static void
                                158                 :            696 : test_re_compile(text *text_re, int cflags, Oid collation,
                                159                 :                :                 regex_t *result_re)
                                160                 :                : {
                                161   [ -  +  -  -  :            696 :     int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
                                     -  -  -  -  -  
                                                 + ]
                                162         [ -  + ]:            696 :     char       *text_re_val = VARDATA_ANY(text_re);
                                163                 :                :     pg_wchar   *pattern;
                                164                 :                :     int         pattern_len;
                                165                 :                :     int         regcomp_result;
                                166                 :                :     char        errMsg[100];
                                167                 :                : 
                                168                 :                :     /* Convert pattern string to wide characters */
                                169                 :            696 :     pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
                                170                 :            696 :     pattern_len = pg_mb2wchar_with_len(text_re_val,
                                171                 :                :                                        pattern,
                                172                 :                :                                        text_re_len);
                                173                 :                : 
                                174                 :            696 :     regcomp_result = pg_regcomp(result_re,
                                175                 :                :                                 pattern,
                                176                 :                :                                 pattern_len,
                                177                 :                :                                 cflags,
                                178                 :                :                                 collation);
                                179                 :                : 
                                180                 :            696 :     pfree(pattern);
                                181                 :                : 
                                182         [ +  + ]:            696 :     if (regcomp_result != REG_OKAY)
                                183                 :                :     {
                                184                 :                :         /* re didn't compile (no need for pg_regfree, if so) */
                                185                 :            106 :         pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
                                186         [ +  - ]:            106 :         ereport(ERROR,
                                187                 :                :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                                188                 :                :                  errmsg("invalid regular expression: %s", errMsg)));
                                189                 :                :     }
                                190                 :            590 : }
                                191                 :                : 
                                192                 :                : /*
                                193                 :                :  * test_re_execute - execute a RE on pg_wchar data
                                194                 :                :  *
                                195                 :                :  * Returns true on match, false on no match
                                196                 :                :  * Arguments are as for pg_regexec
                                197                 :                :  */
                                198                 :                : static bool
                                199                 :            590 : test_re_execute(regex_t *re, pg_wchar *data, int data_len,
                                200                 :                :                 int start_search,
                                201                 :                :                 rm_detail_t *details,
                                202                 :                :                 int nmatch, regmatch_t *pmatch,
                                203                 :                :                 int eflags)
                                204                 :                : {
                                205                 :                :     int         regexec_result;
                                206                 :                :     char        errMsg[100];
                                207                 :                : 
                                208                 :                :     /* Initialize match locations in case engine doesn't */
                                209                 :            590 :     details->rm_extend.rm_so = -1;
                                210                 :            590 :     details->rm_extend.rm_eo = -1;
                                211         [ +  + ]:           1466 :     for (int i = 0; i < nmatch; i++)
                                212                 :                :     {
                                213                 :            876 :         pmatch[i].rm_so = -1;
                                214                 :            876 :         pmatch[i].rm_eo = -1;
                                215                 :                :     }
                                216                 :                : 
                                217                 :                :     /* Perform RE match and return result */
                                218                 :            590 :     regexec_result = pg_regexec(re,
                                219                 :                :                                 data,
                                220                 :                :                                 data_len,
                                221                 :                :                                 start_search,
                                222                 :                :                                 details,
                                223                 :                :                                 nmatch,
                                224                 :                :                                 pmatch,
                                225                 :                :                                 eflags);
                                226                 :                : 
                                227   [ +  +  -  + ]:            590 :     if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
                                228                 :                :     {
                                229                 :                :         /* re failed??? */
 1894 tgl@sss.pgh.pa.us         230                 :UBC           0 :         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
                                231         [ #  # ]:              0 :         ereport(ERROR,
                                232                 :                :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                                233                 :                :                  errmsg("regular expression failed: %s", errMsg)));
                                234                 :                :     }
                                235                 :                : 
 1894 tgl@sss.pgh.pa.us         236                 :CBC         590 :     return (regexec_result == REG_OKAY);
                                237                 :                : }
                                238                 :                : 
                                239                 :                : 
                                240                 :                : /*
                                241                 :                :  * parse_test_flags - parse the flags argument
                                242                 :                :  *
                                243                 :                :  *  flags --- output argument, filled with desired options
                                244                 :                :  *  opts --- TEXT object, or NULL for defaults
                                245                 :                :  */
                                246                 :                : static void
                                247                 :            696 : parse_test_flags(test_re_flags *flags, text *opts)
                                248                 :                : {
                                249                 :                :     /* these defaults must match Tcl's */
                                250                 :            696 :     int         cflags = REG_ADVANCED;
                                251                 :            696 :     int         eflags = 0;
                                252                 :            696 :     long        info = 0;
                                253                 :                : 
                                254                 :            696 :     flags->glob = false;
                                255                 :            696 :     flags->indices = false;
                                256                 :            696 :     flags->partial = false;
                                257                 :                : 
                                258         [ +  - ]:            696 :     if (opts)
                                259                 :                :     {
                                260         [ -  + ]:            696 :         char       *opt_p = VARDATA_ANY(opts);
                                261   [ -  +  -  -  :            696 :         int         opt_len = VARSIZE_ANY_EXHDR(opts);
                                     -  -  -  -  -  
                                                 + ]
                                262                 :                :         int         i;
                                263                 :                : 
                                264         [ +  + ]:           1891 :         for (i = 0; i < opt_len; i++)
                                265                 :                :         {
                                266   [ +  +  +  +  :           1195 :             switch (opt_p[i])
                                     -  +  +  +  +  
                                     +  +  +  +  +  
                                     +  +  +  +  -  
                                     -  -  -  +  +  
                                     +  +  +  +  +  
                                     +  +  +  +  +  
                                     +  +  +  +  +  
                                              +  - ]
                                267                 :                :             {
                                268                 :             78 :                 case '-':
                                269                 :                :                     /* allowed, no-op */
                                270                 :             78 :                     break;
                                271                 :              7 :                 case '!':
                                272                 :              7 :                     flags->partial = true;
                                273                 :              7 :                     break;
                                274                 :              1 :                 case '*':
                                275                 :                :                     /* test requires Unicode --- ignored here */
                                276                 :              1 :                     break;
                                277                 :             53 :                 case '0':
                                278                 :             53 :                     flags->indices = true;
                                279                 :             53 :                     break;
                                280                 :                : 
                                281                 :                :                     /* These flags correspond to user-exposed RE options: */
 1894 tgl@sss.pgh.pa.us         282                 :UBC           0 :                 case 'g':       /* global match */
                                283                 :              0 :                     flags->glob = true;
                                284                 :              0 :                     break;
 1894 tgl@sss.pgh.pa.us         285                 :CBC          20 :                 case 'i':       /* case insensitive */
                                286                 :             20 :                     cflags |= REG_ICASE;
                                287                 :             20 :                     break;
                                288                 :             35 :                 case 'n':       /* \n affects ^ $ . [^ */
                                289                 :             35 :                     cflags |= REG_NEWLINE;
                                290                 :             35 :                     break;
                                291                 :              2 :                 case 'p':       /* ~Perl, \n affects . [^ */
                                292                 :              2 :                     cflags |= REG_NLSTOP;
                                293                 :              2 :                     cflags &= ~REG_NLANCH;
                                294                 :              2 :                     break;
                                295                 :              2 :                 case 'w':       /* weird, \n affects ^ $ only */
                                296                 :              2 :                     cflags &= ~REG_NLSTOP;
                                297                 :              2 :                     cflags |= REG_NLANCH;
                                298                 :              2 :                     break;
                                299                 :             14 :                 case 'x':       /* expanded syntax */
                                300                 :             14 :                     cflags |= REG_EXPANDED;
                                301                 :             14 :                     break;
                                302                 :                : 
                                303                 :                :                     /* These flags correspond to Tcl's -xflags options: */
                                304                 :              2 :                 case 'a':
                                305                 :              2 :                     cflags |= REG_ADVF;
                                306                 :              2 :                     break;
                                307                 :            131 :                 case 'b':
                                308                 :            131 :                     cflags &= ~REG_ADVANCED;
                                309                 :            131 :                     break;
                                310                 :             11 :                 case 'c':
                                311                 :                : 
                                312                 :                :                     /*
                                313                 :                :                      * Tcl calls this TCL_REG_CANMATCH, but it's really
                                314                 :                :                      * REG_EXPECT.  In this implementation we must also set
                                315                 :                :                      * the partial and indices flags, so that
                                316                 :                :                      * setup_test_matches and build_test_match_result will
                                317                 :                :                      * emit the desired data.  (They'll emit more fields than
                                318                 :                :                      * Tcl would, but that's fine.)
                                319                 :                :                      */
                                320                 :             11 :                     cflags |= REG_EXPECT;
                                321                 :             11 :                     flags->partial = true;
                                322                 :             11 :                     flags->indices = true;
                                323                 :             11 :                     break;
                                324                 :             10 :                 case 'e':
                                325                 :             10 :                     cflags &= ~REG_ADVANCED;
                                326                 :             10 :                     cflags |= REG_EXTENDED;
                                327                 :             10 :                     break;
                                328                 :              6 :                 case 'q':
                                329                 :              6 :                     cflags &= ~REG_ADVANCED;
                                330                 :              6 :                     cflags |= REG_QUOTE;
                                331                 :              6 :                     break;
                                332                 :              2 :                 case 'o':       /* o for opaque */
                                333                 :              2 :                     cflags |= REG_NOSUB;
                                334                 :              2 :                     break;
                                335                 :              2 :                 case 's':       /* s for start */
                                336                 :              2 :                     cflags |= REG_BOSONLY;
                                337                 :              2 :                     break;
                                338                 :              6 :                 case '+':
                                339                 :              6 :                     cflags |= REG_FAKE;
                                340                 :              6 :                     break;
 1894 tgl@sss.pgh.pa.us         341                 :UBC           0 :                 case ',':
                                342                 :              0 :                     cflags |= REG_PROGRESS;
                                343                 :              0 :                     break;
                                344                 :              0 :                 case '.':
                                345                 :              0 :                     cflags |= REG_DUMP;
                                346                 :              0 :                     break;
                                347                 :              0 :                 case ':':
                                348                 :              0 :                     eflags |= REG_MTRACE;
                                349                 :              0 :                     break;
                                350                 :              0 :                 case ';':
                                351                 :              0 :                     eflags |= REG_FTRACE;
                                352                 :              0 :                     break;
 1894 tgl@sss.pgh.pa.us         353                 :CBC           6 :                 case '^':
                                354                 :              6 :                     eflags |= REG_NOTBOL;
                                355                 :              6 :                     break;
                                356                 :              4 :                 case '$':
                                357                 :              4 :                     eflags |= REG_NOTEOL;
                                358                 :              4 :                     break;
                                359                 :             17 :                 case 't':
                                360                 :             17 :                     cflags |= REG_EXPECT;
                                361                 :             17 :                     break;
                                362                 :              5 :                 case '%':
                                363                 :              5 :                     eflags |= REG_SMALL;
                                364                 :              5 :                     break;
                                365                 :                : 
                                366                 :                :                     /* These flags define expected info bits: */
                                367                 :              5 :                 case 'A':
                                368                 :              5 :                     info |= REG_UBSALNUM;
                                369                 :              5 :                     break;
                                370                 :              4 :                 case 'B':
                                371                 :              4 :                     info |= REG_UBRACES;
                                372                 :              4 :                     break;
                                373                 :             42 :                 case 'E':
                                374                 :             42 :                     info |= REG_UBBS;
                                375                 :             42 :                     break;
                                376                 :             34 :                 case 'H':
                                377                 :             34 :                     info |= REG_ULOOKAROUND;
                                378                 :             34 :                     break;
                                379                 :             11 :                 case 'I':
                                380                 :             11 :                     info |= REG_UIMPOSSIBLE;
                                381                 :             11 :                     break;
                                382                 :            164 :                 case 'L':
                                383                 :            164 :                     info |= REG_ULOCALE;
                                384                 :            164 :                     break;
                                385                 :             43 :                 case 'M':
                                386                 :             43 :                     info |= REG_UUNPORT;
                                387                 :             43 :                     break;
                                388                 :             47 :                 case 'N':
                                389                 :             47 :                     info |= REG_UEMPTYMATCH;
                                390                 :             47 :                     break;
                                391                 :            307 :                 case 'P':
                                392                 :            307 :                     info |= REG_UNONPOSIX;
                                393                 :            307 :                     break;
                                394                 :             36 :                 case 'Q':
                                395                 :             36 :                     info |= REG_UBOUNDS;
                                396                 :             36 :                     break;
                                397                 :             42 :                 case 'R':
                                398                 :             42 :                     info |= REG_UBACKREF;
                                399                 :             42 :                     break;
                                400                 :             25 :                 case 'S':
                                401                 :             25 :                     info |= REG_UUNSPEC;
                                402                 :             25 :                     break;
                                403                 :             20 :                 case 'T':
                                404                 :             20 :                     info |= REG_USHORTEST;
                                405                 :             20 :                     break;
                                406                 :              1 :                 case 'U':
                                407                 :              1 :                     info |= REG_UPBOTCH;
                                408                 :              1 :                     break;
                                409                 :                : 
 1894 tgl@sss.pgh.pa.us         410                 :UBC           0 :                 default:
                                411         [ #  # ]:              0 :                     ereport(ERROR,
                                412                 :                :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                413                 :                :                              errmsg("invalid regular expression test option: \"%.*s\"",
                                414                 :                :                                     pg_mblen_range(opt_p + i, opt_p + opt_len),
                                415                 :                :                                     opt_p + i)));
                                416                 :                :                     break;
                                417                 :                :             }
                                418                 :                :         }
                                419                 :                :     }
 1894 tgl@sss.pgh.pa.us         420                 :CBC         696 :     flags->cflags = cflags;
                                421                 :            696 :     flags->eflags = eflags;
                                422                 :            696 :     flags->info = info;
                                423                 :            696 : }
                                424                 :                : 
                                425                 :                : /*
                                426                 :                :  * setup_test_matches --- do the initial matching
                                427                 :                :  *
                                428                 :                :  * To simplify memory management, we do all the matching in one swoop.
                                429                 :                :  * The returned test_regex_ctx contains the locations of all the substrings
                                430                 :                :  * matching the pattern.
                                431                 :                :  */
                                432                 :                : static test_regex_ctx *
                                433                 :            590 : setup_test_matches(text *orig_str,
                                434                 :                :                    regex_t *cpattern, test_re_flags *re_flags,
                                435                 :                :                    Oid collation,
                                436                 :                :                    bool use_subpatterns)
                                437                 :                : {
   96 michael@paquier.xyz       438                 :GNC         590 :     test_regex_ctx *matchctx = palloc0_object(test_regex_ctx);
 1894 tgl@sss.pgh.pa.us         439                 :CBC         590 :     int         eml = pg_database_encoding_max_length();
                                440                 :                :     int         orig_len;
                                441                 :                :     pg_wchar   *wide_str;
                                442                 :                :     int         wide_len;
                                443                 :                :     regmatch_t *pmatch;
                                444                 :                :     int         pmatch_len;
                                445                 :                :     int         array_len;
                                446                 :                :     int         array_idx;
                                447                 :                :     int         prev_match_end;
                                448                 :                :     int         start_search;
                                449                 :            590 :     int         maxlen = 0;     /* largest fetch length in characters */
                                450                 :                : 
                                451                 :                :     /* save flags */
                                452                 :            590 :     matchctx->re_flags = *re_flags;
                                453                 :                : 
                                454                 :                :     /* save original string --- we'll extract result substrings from it */
                                455                 :            590 :     matchctx->orig_str = orig_str;
                                456                 :                : 
                                457                 :                :     /* convert string to pg_wchar form for matching */
                                458   [ -  +  -  -  :            590 :     orig_len = VARSIZE_ANY_EXHDR(orig_str);
                                     -  -  -  -  -  
                                                 + ]
   96 michael@paquier.xyz       459                 :GNC         590 :     wide_str = palloc_array(pg_wchar, orig_len + 1);
 1894 tgl@sss.pgh.pa.us         460         [ -  + ]:CBC         590 :     wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
                                461                 :                : 
                                462                 :                :     /* do we want to remember subpatterns? */
                                463   [ +  -  +  + ]:            590 :     if (use_subpatterns && cpattern->re_nsub > 0)
                                464                 :                :     {
                                465                 :            127 :         matchctx->npatterns = cpattern->re_nsub + 1;
                                466                 :            127 :         pmatch_len = cpattern->re_nsub + 1;
                                467                 :                :     }
                                468                 :                :     else
                                469                 :                :     {
                                470                 :            463 :         use_subpatterns = false;
                                471                 :            463 :         matchctx->npatterns = 1;
                                472                 :            463 :         pmatch_len = 1;
                                473                 :                :     }
                                474                 :                : 
                                475                 :                :     /* temporary output space for RE package */
   96 michael@paquier.xyz       476                 :GNC         590 :     pmatch = palloc_array(regmatch_t, pmatch_len);
                                477                 :                : 
                                478                 :                :     /*
                                479                 :                :      * the real output space (grown dynamically if needed)
                                480                 :                :      *
                                481                 :                :      * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
                                482                 :                :      * than at 2^27
                                483                 :                :      */
 1894 tgl@sss.pgh.pa.us         484         [ -  + ]:CBC         590 :     array_len = re_flags->glob ? 255 : 31;
   96 michael@paquier.xyz       485                 :GNC         590 :     matchctx->match_locs = palloc_array(int, array_len);
 1894 tgl@sss.pgh.pa.us         486                 :CBC         590 :     array_idx = 0;
                                487                 :                : 
                                488                 :                :     /* search for the pattern, perhaps repeatedly */
                                489                 :            590 :     prev_match_end = 0;
                                490                 :            590 :     start_search = 0;
                                491         [ +  + ]:            590 :     while (test_re_execute(cpattern, wide_str, wide_len,
                                492                 :                :                            start_search,
                                493                 :                :                            &matchctx->details,
                                494                 :                :                            pmatch_len, pmatch,
                                495                 :                :                            re_flags->eflags))
                                496                 :                :     {
                                497                 :                :         /* enlarge output space if needed */
                                498         [ -  + ]:            463 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
                                499                 :                :         {
 1894 tgl@sss.pgh.pa.us         500                 :UBC           0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
                                501         [ #  # ]:              0 :             if (array_len > MaxAllocSize / sizeof(int))
                                502         [ #  # ]:              0 :                 ereport(ERROR,
                                503                 :                :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                504                 :                :                          errmsg("too many regular expression matches")));
                                505                 :              0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
                                506                 :                :                                                     sizeof(int) * array_len);
                                507                 :                :         }
                                508                 :                : 
                                509                 :                :         /* save this match's locations */
 1894 tgl@sss.pgh.pa.us         510         [ +  + ]:CBC        1094 :         for (int i = 0; i < matchctx->npatterns; i++)
                                511                 :                :         {
                                512                 :            631 :             int         so = pmatch[i].rm_so;
                                513                 :            631 :             int         eo = pmatch[i].rm_eo;
                                514                 :                : 
                                515                 :            631 :             matchctx->match_locs[array_idx++] = so;
                                516                 :            631 :             matchctx->match_locs[array_idx++] = eo;
                                517   [ +  +  +  -  :            631 :             if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
                                              +  + ]
                                518                 :            438 :                 maxlen = (eo - so);
                                519                 :                :         }
                                520                 :            463 :         matchctx->nmatches++;
                                521                 :            463 :         prev_match_end = pmatch[0].rm_eo;
                                522                 :                : 
                                523                 :                :         /* if not glob, stop after one match */
                                524         [ +  - ]:            463 :         if (!re_flags->glob)
                                525                 :            463 :             break;
                                526                 :                : 
                                527                 :                :         /*
                                528                 :                :          * Advance search position.  Normally we start the next search at the
                                529                 :                :          * end of the previous match; but if the match was of zero length, we
                                530                 :                :          * have to advance by one character, or we'd just find the same match
                                531                 :                :          * again.
                                532                 :                :          */
 1894 tgl@sss.pgh.pa.us         533                 :UBC           0 :         start_search = prev_match_end;
                                534         [ #  # ]:              0 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
                                535                 :              0 :             start_search++;
                                536         [ #  # ]:              0 :         if (start_search > wide_len)
                                537                 :              0 :             break;
                                538                 :                :     }
                                539                 :                : 
                                540                 :                :     /*
                                541                 :                :      * If we had no match, but "partial" and "indices" are set, emit the
                                542                 :                :      * details.
                                543                 :                :      */
 1894 tgl@sss.pgh.pa.us         544   [ +  +  +  +  :CBC         590 :     if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
                                              +  - ]
                                545                 :                :     {
                                546                 :                :         /* enlarge output space if needed */
 1883                           547         [ -  + ]:             18 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
                                548                 :                :         {
 1883 tgl@sss.pgh.pa.us         549                 :UBC           0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
                                550         [ #  # ]:              0 :             if (array_len > MaxAllocSize / sizeof(int))
                                551         [ #  # ]:              0 :                 ereport(ERROR,
                                552                 :                :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                553                 :                :                          errmsg("too many regular expression matches")));
                                554                 :              0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
                                555                 :                :                                                     sizeof(int) * array_len);
                                556                 :                :         }
                                557                 :                : 
 1894 tgl@sss.pgh.pa.us         558                 :CBC          18 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
                                559                 :             18 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
                                560                 :                :         /* we don't have pmatch data, so emit -1 */
                                561         [ +  + ]:             20 :         for (int i = 1; i < matchctx->npatterns; i++)
                                562                 :                :         {
                                563                 :              2 :             matchctx->match_locs[array_idx++] = -1;
                                564                 :              2 :             matchctx->match_locs[array_idx++] = -1;
                                565                 :                :         }
                                566                 :             18 :         matchctx->nmatches++;
                                567                 :                :     }
                                568                 :                : 
 1883                           569         [ -  + ]:            590 :     Assert(array_idx <= array_len);
                                570                 :                : 
 1894                           571         [ +  - ]:            590 :     if (eml > 1)
                                572                 :                :     {
                                573                 :            590 :         int64       maxsiz = eml * (int64) maxlen;
                                574                 :                :         int         conv_bufsiz;
                                575                 :                : 
                                576                 :                :         /*
                                577                 :                :          * Make the conversion buffer large enough for any substring of
                                578                 :                :          * interest.
                                579                 :                :          *
                                580                 :                :          * Worst case: assume we need the maximum size (maxlen*eml), but take
                                581                 :                :          * advantage of the fact that the original string length in bytes is
                                582                 :                :          * an upper bound on the byte length of any fetched substring (and we
                                583                 :                :          * know that len+1 is safe to allocate because the varlena header is
                                584                 :                :          * longer than 1 byte).
                                585                 :                :          */
                                586         [ +  + ]:            590 :         if (maxsiz > orig_len)
                                587                 :            415 :             conv_bufsiz = orig_len + 1;
                                588                 :                :         else
                                589                 :            175 :             conv_bufsiz = maxsiz + 1;   /* safe since maxsiz < 2^30 */
                                590                 :                : 
                                591                 :            590 :         matchctx->conv_buf = palloc(conv_bufsiz);
                                592                 :            590 :         matchctx->conv_bufsiz = conv_bufsiz;
                                593                 :            590 :         matchctx->wide_str = wide_str;
                                594                 :                :     }
                                595                 :                :     else
                                596                 :                :     {
                                597                 :                :         /* No need to keep the wide string if we're in a single-byte charset. */
 1894 tgl@sss.pgh.pa.us         598                 :UBC           0 :         pfree(wide_str);
                                599                 :              0 :         matchctx->wide_str = NULL;
                                600                 :              0 :         matchctx->conv_buf = NULL;
                                601                 :              0 :         matchctx->conv_bufsiz = 0;
                                602                 :                :     }
                                603                 :                : 
                                604                 :                :     /* Clean up temp storage */
 1894 tgl@sss.pgh.pa.us         605                 :CBC         590 :     pfree(pmatch);
                                606                 :                : 
                                607                 :            590 :     return matchctx;
                                608                 :                : }
                                609                 :                : 
                                610                 :                : /*
                                611                 :                :  * build_test_info_result - build output array describing compiled regexp
                                612                 :                :  *
                                613                 :                :  * This borrows some code from Tcl's TclRegAbout().
                                614                 :                :  */
                                615                 :                : static ArrayType *
                                616                 :            590 : build_test_info_result(regex_t *cpattern, test_re_flags *flags)
                                617                 :                : {
                                618                 :                :     /* Translation data for flag bits in regex_t.re_info */
                                619                 :                :     struct infoname
                                620                 :                :     {
                                621                 :                :         int         bit;
                                622                 :                :         const char *text;
                                623                 :                :     };
                                624                 :                :     static const struct infoname infonames[] = {
                                625                 :                :         {REG_UBACKREF, "REG_UBACKREF"},
                                626                 :                :         {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
                                627                 :                :         {REG_UBOUNDS, "REG_UBOUNDS"},
                                628                 :                :         {REG_UBRACES, "REG_UBRACES"},
                                629                 :                :         {REG_UBSALNUM, "REG_UBSALNUM"},
                                630                 :                :         {REG_UPBOTCH, "REG_UPBOTCH"},
                                631                 :                :         {REG_UBBS, "REG_UBBS"},
                                632                 :                :         {REG_UNONPOSIX, "REG_UNONPOSIX"},
                                633                 :                :         {REG_UUNSPEC, "REG_UUNSPEC"},
                                634                 :                :         {REG_UUNPORT, "REG_UUNPORT"},
                                635                 :                :         {REG_ULOCALE, "REG_ULOCALE"},
                                636                 :                :         {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
                                637                 :                :         {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
                                638                 :                :         {REG_USHORTEST, "REG_USHORTEST"},
                                639                 :                :         {0, NULL}
                                640                 :                :     };
                                641                 :                :     const struct infoname *inf;
                                642                 :                :     Datum       elems[lengthof(infonames) + 1];
                                643                 :            590 :     int         nresults = 0;
                                644                 :                :     char        buf[80];
                                645                 :                :     int         dims[1];
                                646                 :                :     int         lbs[1];
                                647                 :                : 
                                648                 :                :     /* Set up results: first, the number of subexpressions */
                                649                 :            590 :     snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
                                650                 :            590 :     elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
                                651                 :                : 
                                652                 :                :     /* Report individual info bit states */
                                653         [ +  + ]:           8850 :     for (inf = infonames; inf->bit != 0; inf++)
                                654                 :                :     {
                                655         [ +  + ]:           8260 :         if (cpattern->re_info & inf->bit)
                                656                 :                :         {
                                657         [ +  - ]:            758 :             if (flags->info & inf->bit)
                                658                 :            758 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
                                659                 :                :             else
                                660                 :                :             {
 1894 tgl@sss.pgh.pa.us         661                 :UBC           0 :                 snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
                                662                 :              0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
                                663                 :                :             }
                                664                 :                :         }
                                665                 :                :         else
                                666                 :                :         {
 1894 tgl@sss.pgh.pa.us         667         [ -  + ]:CBC        7502 :             if (flags->info & inf->bit)
                                668                 :                :             {
 1894 tgl@sss.pgh.pa.us         669                 :UBC           0 :                 snprintf(buf, sizeof(buf), "missing %s!", inf->text);
                                670                 :              0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
                                671                 :                :             }
                                672                 :                :         }
                                673                 :                :     }
                                674                 :                : 
                                675                 :                :     /* And form an array */
 1894 tgl@sss.pgh.pa.us         676                 :CBC         590 :     dims[0] = nresults;
                                677                 :            590 :     lbs[0] = 1;
                                678                 :                :     /* XXX: this hardcodes assumptions about the text type */
                                679                 :            590 :     return construct_md_array(elems, NULL, 1, dims, lbs,
                                680                 :                :                               TEXTOID, -1, false, TYPALIGN_INT);
                                681                 :                : }
                                682                 :                : 
                                683                 :                : /*
                                684                 :                :  * build_test_match_result - build output array for current match
                                685                 :                :  *
                                686                 :                :  * Note that if the indices flag is set, we don't need any strings,
                                687                 :                :  * just the location data.
                                688                 :                :  */
                                689                 :                : static ArrayType *
                                690                 :            481 : build_test_match_result(test_regex_ctx *matchctx)
                                691                 :                : {
                                692                 :            481 :     char       *buf = matchctx->conv_buf;
                                693                 :            481 :     Datum      *elems = matchctx->elems;
                                694                 :            481 :     bool       *nulls = matchctx->nulls;
                                695                 :            481 :     bool        indices = matchctx->re_flags.indices;
                                696                 :                :     char        bufstr[80];
                                697                 :                :     int         dims[1];
                                698                 :                :     int         lbs[1];
                                699                 :                :     int         loc;
                                700                 :                :     int         i;
                                701                 :                : 
                                702                 :                :     /* Extract matching substrings from the original string */
                                703                 :            481 :     loc = matchctx->next_match * matchctx->npatterns * 2;
                                704         [ +  + ]:           1132 :     for (i = 0; i < matchctx->npatterns; i++)
                                705                 :                :     {
                                706                 :            651 :         int         so = matchctx->match_locs[loc++];
                                707                 :            651 :         int         eo = matchctx->match_locs[loc++];
                                708                 :                : 
                                709         [ +  + ]:            651 :         if (indices)
                                710                 :                :         {
                                711                 :                :             /* Report eo this way for consistency with Tcl */
                                712         [ +  + ]:             84 :             snprintf(bufstr, sizeof(bufstr), "%d %d",
                                713                 :                :                      so, so < 0 ? eo : eo - 1);
                                714                 :             84 :             elems[i] = PointerGetDatum(cstring_to_text(bufstr));
                                715                 :             84 :             nulls[i] = false;
                                716                 :                :         }
                                717   [ +  +  -  + ]:            567 :         else if (so < 0 || eo < 0)
                                718                 :                :         {
                                719                 :             12 :             elems[i] = (Datum) 0;
                                720                 :             12 :             nulls[i] = true;
                                721                 :                :         }
                                722         [ +  - ]:            555 :         else if (buf)
                                723                 :                :         {
                                724                 :            555 :             int         len = pg_wchar2mb_with_len(matchctx->wide_str + so,
                                725                 :                :                                                    buf,
                                726                 :                :                                                    eo - so);
                                727                 :                : 
                                728         [ -  + ]:            555 :             Assert(len < matchctx->conv_bufsiz);
                                729                 :            555 :             elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
                                730                 :            555 :             nulls[i] = false;
                                731                 :                :         }
                                732                 :                :         else
                                733                 :                :         {
 1894 tgl@sss.pgh.pa.us         734                 :UBC           0 :             elems[i] = DirectFunctionCall3(text_substr,
                                735                 :                :                                            PointerGetDatum(matchctx->orig_str),
                                736                 :                :                                            Int32GetDatum(so + 1),
                                737                 :                :                                            Int32GetDatum(eo - so));
                                738                 :              0 :             nulls[i] = false;
                                739                 :                :         }
                                740                 :                :     }
                                741                 :                : 
                                742                 :                :     /* In EXPECT indices mode, also report the "details" */
 1894 tgl@sss.pgh.pa.us         743   [ +  +  +  + ]:CBC         481 :     if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
                                744                 :                :     {
                                745                 :             28 :         int         so = matchctx->details.rm_extend.rm_so;
                                746                 :             28 :         int         eo = matchctx->details.rm_extend.rm_eo;
                                747                 :                : 
                                748         [ +  + ]:             28 :         snprintf(bufstr, sizeof(bufstr), "%d %d",
                                749                 :                :                  so, so < 0 ? eo : eo - 1);
                                750                 :             28 :         elems[i] = PointerGetDatum(cstring_to_text(bufstr));
                                751                 :             28 :         nulls[i] = false;
                                752                 :             28 :         i++;
                                753                 :                :     }
                                754                 :                : 
                                755                 :                :     /* And form an array */
                                756                 :            481 :     dims[0] = i;
                                757                 :            481 :     lbs[0] = 1;
                                758                 :                :     /* XXX: this hardcodes assumptions about the text type */
                                759                 :            481 :     return construct_md_array(elems, nulls, 1, dims, lbs,
                                760                 :                :                               TEXTOID, -1, false, TYPALIGN_INT);
                                761                 :                : }

Generated by: LCOV version 2.4-beta