Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * ts_locale.c
4 : : * locale compatibility layer for tsearch
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : *
8 : : *
9 : : * IDENTIFICATION
10 : : * src/backend/tsearch/ts_locale.c
11 : : *
12 : : *-------------------------------------------------------------------------
13 : : */
14 : : #include "postgres.h"
15 : :
16 : : #include "common/string.h"
17 : : #include "storage/fd.h"
18 : : #include "tsearch/ts_locale.h"
19 : :
20 : : static void tsearch_readline_callback(void *arg);
21 : :
22 : :
23 : : /* space for a single character plus a trailing NUL */
24 : : #define WC_BUF_LEN 2
25 : :
26 : : int
6562 tgl@sss.pgh.pa.us 27 :CBC 5142 : t_isalpha(const char *ptr)
28 : : {
29 : : pg_wchar wstr[WC_BUF_LEN];
30 : : int wlen pg_attribute_unused();
31 : :
6 jdavis@postgresql.or 32 :GNC 5142 : wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr));
33 [ - + ]: 5142 : Assert(wlen <= 1);
34 : :
35 : : /* pass single character, or NUL if empty */
36 : 5142 : return pg_iswalpha(wstr[0], pg_database_locale());
37 : : }
38 : :
39 : : int
1117 tgl@sss.pgh.pa.us 40 :CBC 1395986 : t_isalnum(const char *ptr)
41 : : {
42 : : pg_wchar wstr[WC_BUF_LEN];
43 : : int wlen pg_attribute_unused();
44 : :
6 jdavis@postgresql.or 45 :GNC 1395986 : wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr));
46 [ - + ]: 1395986 : Assert(wlen <= 1);
47 : :
48 : : /* pass single character, or NUL if empty */
49 : 1395986 : return pg_iswalnum(wstr[0], pg_database_locale());
50 : : }
51 : :
52 : :
53 : : /*
54 : : * Set up to read a file using tsearch_readline(). This facility is
55 : : * better than just reading the file directly because it provides error
56 : : * context pointing to the specific line where a problem is detected.
57 : : *
58 : : * Expected usage is:
59 : : *
60 : : * tsearch_readline_state trst;
61 : : *
62 : : * if (!tsearch_readline_begin(&trst, filename))
63 : : * ereport(ERROR,
64 : : * (errcode(ERRCODE_CONFIG_FILE_ERROR),
65 : : * errmsg("could not open stop-word file \"%s\": %m",
66 : : * filename)));
67 : : * while ((line = tsearch_readline(&trst)) != NULL)
68 : : * process line;
69 : : * tsearch_readline_end(&trst);
70 : : *
71 : : * Note that the caller supplies the ereport() for file open failure;
72 : : * this is so that a custom message can be provided. The filename string
73 : : * passed to tsearch_readline_begin() must remain valid through
74 : : * tsearch_readline_end().
75 : : */
76 : : bool
6340 tgl@sss.pgh.pa.us 77 :CBC 278 : tsearch_readline_begin(tsearch_readline_state *stp,
78 : : const char *filename)
79 : : {
80 [ - + ]: 278 : if ((stp->fp = AllocateFile(filename, "r")) == NULL)
6340 tgl@sss.pgh.pa.us 81 :UBC 0 : return false;
6340 tgl@sss.pgh.pa.us 82 :CBC 278 : stp->filename = filename;
83 : 278 : stp->lineno = 0;
1860 84 : 278 : initStringInfo(&stp->buf);
6340 85 : 278 : stp->curline = NULL;
86 : : /* Setup error traceback support for ereport() */
87 : 278 : stp->cb.callback = tsearch_readline_callback;
333 peter@eisentraut.org 88 : 278 : stp->cb.arg = stp;
6340 tgl@sss.pgh.pa.us 89 : 278 : stp->cb.previous = error_context_stack;
90 : 278 : error_context_stack = &stp->cb;
91 : 278 : return true;
92 : : }
93 : :
94 : : /*
95 : : * Read the next line from a tsearch data file (expected to be in UTF-8), and
96 : : * convert it to database encoding if needed. The returned string is palloc'd.
97 : : * NULL return means EOF.
98 : : */
99 : : char *
100 : 12733 : tsearch_readline(tsearch_readline_state *stp)
101 : : {
102 : : char *recoded;
103 : :
104 : : /* Advance line number to use in error reports */
105 : 12733 : stp->lineno++;
106 : :
107 : : /* Clear curline, it's no longer relevant */
1860 108 [ + + ]: 12733 : if (stp->curline)
109 : : {
110 [ - + ]: 12455 : if (stp->curline != stp->buf.data)
1860 tgl@sss.pgh.pa.us 111 :UBC 0 : pfree(stp->curline);
1860 tgl@sss.pgh.pa.us 112 :CBC 12455 : stp->curline = NULL;
113 : : }
114 : :
115 : : /* Collect next line, if there is one */
116 [ + + ]: 12733 : if (!pg_get_line_buf(stp->fp, &stp->buf))
117 : 235 : return NULL;
118 : :
119 : : /* Validate the input as UTF-8, then convert to DB encoding if needed */
120 : 12498 : recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
121 : :
122 : : /* Save the correctly-encoded string for possible error reports */
123 : 12498 : stp->curline = recoded; /* might be equal to buf.data */
124 : :
125 : : /*
126 : : * We always return a freshly pstrdup'd string. This is clearly necessary
127 : : * if pg_any_to_server() returned buf.data, and we need a second copy even
128 : : * if encoding conversion did occur. The caller is entitled to pfree the
129 : : * returned string at any time, which would leave curline pointing to
130 : : * recycled storage, causing problems if an error occurs after that point.
131 : : * (It's preferable to return the result of pstrdup instead of the output
132 : : * of pg_any_to_server, because the conversion result tends to be
133 : : * over-allocated. Since callers might save the result string directly
134 : : * into a long-lived dictionary structure, we don't want it to be a larger
135 : : * palloc chunk than necessary. We'll reclaim the conversion result on
136 : : * the next call.)
137 : : */
138 : 12498 : return pstrdup(recoded);
139 : : }
140 : :
141 : : /*
142 : : * Close down after reading a file with tsearch_readline()
143 : : */
144 : : void
6340 145 : 278 : tsearch_readline_end(tsearch_readline_state *stp)
146 : : {
147 : : /* Suppress use of curline in any error reported below */
1860 148 [ + + ]: 278 : if (stp->curline)
149 : : {
150 [ - + ]: 43 : if (stp->curline != stp->buf.data)
1860 tgl@sss.pgh.pa.us 151 :UBC 0 : pfree(stp->curline);
1860 tgl@sss.pgh.pa.us 152 :CBC 43 : stp->curline = NULL;
153 : : }
154 : :
155 : : /* Release other resources */
156 : 278 : pfree(stp->buf.data);
6340 157 : 278 : FreeFile(stp->fp);
158 : :
159 : : /* Pop the error context stack */
160 : 278 : error_context_stack = stp->cb.previous;
161 : 278 : }
162 : :
163 : : /*
164 : : * Error context callback for errors occurring while reading a tsearch
165 : : * configuration file.
166 : : */
167 : : static void
6340 tgl@sss.pgh.pa.us 168 :UBC 0 : tsearch_readline_callback(void *arg)
169 : : {
170 : 0 : tsearch_readline_state *stp = (tsearch_readline_state *) arg;
171 : :
172 : : /*
173 : : * We can't include the text of the config line for errors that occur
174 : : * during tsearch_readline() itself. The major cause of such errors is
175 : : * encoding violations, and we daren't try to print error messages
176 : : * containing badly-encoded data.
177 : : */
178 [ # # ]: 0 : if (stp->curline)
179 : 0 : errcontext("line %d of configuration file \"%s\": \"%s\"",
180 : : stp->lineno,
181 : : stp->filename,
182 : : stp->curline);
183 : : else
184 : 0 : errcontext("line %d of configuration file \"%s\"",
185 : : stp->lineno,
186 : : stp->filename);
187 : 0 : }
|