Age Owner Branch data TLA Line data Source code
1 : : /*
2 : : * contrib/hstore/hstore_compat.c
3 : : *
4 : : * Notes on old/new hstore format disambiguation.
5 : : *
6 : : * There are three formats to consider:
7 : : * 1) old contrib/hstore (referred to as hstore-old)
8 : : * 2) prerelease pgfoundry hstore
9 : : * 3) new contrib/hstore
10 : : *
11 : : * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
12 : : * bit, which is set in (3) but not (2).
13 : : *
14 : : * Values that are already in format (3), or which are
15 : : * unambiguously in format (2), are handled by the first
16 : : * "return immediately" test in hstoreUpgrade().
17 : : *
18 : : * To stress a point: we ONLY get here with possibly-ambiguous
19 : : * values if we're doing some sort of in-place migration from an
20 : : * old prerelease pgfoundry hstore-new; and we explicitly don't
21 : : * support that without fixing up any potentially padded values
22 : : * first. Most of the code here is serious overkill, but the
23 : : * performance penalty isn't serious (especially compared to the
24 : : * palloc() that we have to do anyway) and the belt-and-braces
25 : : * validity checks provide some reassurance. (If for some reason
26 : : * we get a value that would have worked on the old code, but
27 : : * which would be botched by the conversion code, the validity
28 : : * checks will fail it first so we get an error rather than bad
29 : : * data.)
30 : : *
31 : : * Note also that empty hstores are the same in (2) and (3), so
32 : : * there are some special-case paths for them.
33 : : *
34 : : * We tell the difference between formats (2) and (3) as follows (but
35 : : * note that there are some edge cases where we can't tell; see
36 : : * comments in hstoreUpgrade):
37 : : *
38 : : * First, since there must be at least one entry, we look at
39 : : * how the bits line up. The new format looks like:
40 : : *
41 : : * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
42 : : * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
43 : : *
44 : : * The old format looks like one of these, depending on endianness
45 : : * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
46 : : * n = isnull)
47 : : *
48 : : * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
49 : : * nppppppppppppppppppppppppppppppp
50 : : *
51 : : * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
52 : : * pppppppppppppppppppppppppppppppn
53 : : *
54 : : * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
55 : : * nppppppppppppppppppppppppppppppp
56 : : *
57 : : * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
58 : : * pppppppppppppppppppppppppppppppn (usual i386 format)
59 : : *
60 : : * If the entry is in old format, for the first entry "pos" must be 0.
61 : : * We can obviously see that either keylen or vallen must be >32768
62 : : * for there to be any ambiguity (which is why lengths less than that
63 : : * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
64 : : * new-format interpretation can only be 0 or 1, which constrains all
65 : : * but three bits of the old-format's k and v fields. But in addition
66 : : * to all of this, the data length implied by the keylen and vallen
67 : : * must fit in the varlena size. So the only ambiguous edge case for
68 : : * hstores with only one entry occurs between a new-format entry with
69 : : * an excess (~32k) of padding, and an old-format entry. But we know
70 : : * which format to use in that case based on how we were compiled, so
71 : : * no actual data corruption can occur.
72 : : *
73 : : * If there is more than one entry, the requirement that keys do not
74 : : * decrease in length, and that positions increase contiguously, and
75 : : * that the end of the data not be beyond the end of the varlena
76 : : * itself, disambiguates in almost all other cases. There is a small
77 : : * set of ambiguous cases which could occur if the old-format value
78 : : * has a large excess of padding and just the right pattern of key
79 : : * sizes, but these are also handled based on how we were compiled.
80 : : *
81 : : * The otherwise undocumented function hstore_version_diag is provided
82 : : * for testing purposes.
83 : : */
84 : : #include "postgres.h"
85 : :
86 : :
87 : : #include "hstore.h"
88 : :
89 : : /*
90 : : * This is the structure used for entries in the old contrib/hstore
91 : : * implementation. Notice that this is the same size as the new entry
92 : : * (two 32-bit words per key/value pair) and that the header is the
93 : : * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
94 : : * etc. are compatible.
95 : : *
96 : : * If the above statement isn't true on some bizarre platform, we're
97 : : * a bit hosed.
98 : : */
99 : : typedef struct
100 : : {
101 : : uint16 keylen;
102 : : uint16 vallen;
103 : : uint32
104 : : valisnull:1,
105 : : pos:31;
106 : : } HOldEntry;
107 : :
108 : : StaticAssertDecl(sizeof(HOldEntry) == 2 * sizeof(HEntry),
109 : : "old hstore format is not upward-compatible");
110 : :
111 : : static int hstoreValidNewFormat(HStore *hs);
112 : : static int hstoreValidOldFormat(HStore *hs);
113 : :
114 : :
115 : : /*
116 : : * Validity test for a new-format hstore.
117 : : * 0 = not valid
118 : : * 1 = valid but with "slop" in the length
119 : : * 2 = exactly valid
120 : : */
121 : : static int
5921 tgl@sss.pgh.pa.us 122 :UBC 0 : hstoreValidNewFormat(HStore *hs)
123 : : {
5772 bruce@momjian.us 124 : 0 : int count = HS_COUNT(hs);
125 : 0 : HEntry *entries = ARRPTR(hs);
126 [ # # ]: 0 : int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
127 : 0 : int vsize = CALCDATASIZE(count, buflen);
128 : : int i;
129 : :
5921 tgl@sss.pgh.pa.us 130 [ # # ]: 0 : if (hs->size_ & HS_FLAG_NEWVERSION)
131 : 0 : return 2;
132 : :
133 [ # # ]: 0 : if (count == 0)
134 : 0 : return 2;
135 : :
136 [ # # ]: 0 : if (!HSE_ISFIRST(entries[0]))
137 : 0 : return 0;
138 : :
139 [ # # ]: 0 : if (vsize > VARSIZE(hs))
140 : 0 : return 0;
141 : :
142 : : /* entry position must be nondecreasing */
143 : :
5772 bruce@momjian.us 144 [ # # ]: 0 : for (i = 1; i < 2 * count; ++i)
145 : : {
5921 tgl@sss.pgh.pa.us 146 [ # # ]: 0 : if (HSE_ISFIRST(entries[i])
5772 bruce@momjian.us 147 [ # # ]: 0 : || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
5921 tgl@sss.pgh.pa.us 148 : 0 : return 0;
149 : : }
150 : :
151 : : /* key length must be nondecreasing and keys must not be null */
152 : :
153 [ # # ]: 0 : for (i = 1; i < count; ++i)
154 : : {
3680 155 [ # # # # : 0 : if (HSTORE_KEYLEN(entries, i) < HSTORE_KEYLEN(entries, i - 1))
# # ]
5921 156 : 0 : return 0;
5772 bruce@momjian.us 157 [ # # ]: 0 : if (HSE_ISNULL(entries[2 * i]))
5921 tgl@sss.pgh.pa.us 158 : 0 : return 0;
159 : : }
160 : :
161 [ # # ]: 0 : if (vsize != VARSIZE(hs))
162 : 0 : return 1;
163 : :
164 : 0 : return 2;
165 : : }
166 : :
167 : : /*
168 : : * Validity test for an old-format hstore.
169 : : * 0 = not valid
170 : : * 1 = valid but with "slop" in the length
171 : : * 2 = exactly valid
172 : : */
173 : : static int
174 : 0 : hstoreValidOldFormat(HStore *hs)
175 : : {
5772 bruce@momjian.us 176 : 0 : int count = hs->size_;
177 : 0 : HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
178 : : int vsize;
179 : 0 : int lastpos = 0;
180 : : int i;
181 : :
5921 tgl@sss.pgh.pa.us 182 [ # # ]: 0 : if (hs->size_ & HS_FLAG_NEWVERSION)
183 : 0 : return 0;
184 : :
185 [ # # ]: 0 : if (count == 0)
186 : 0 : return 2;
187 : :
188 [ # # ]: 0 : if (count > 0xFFFFFFF)
189 : 0 : return 0;
190 : :
5772 bruce@momjian.us 191 [ # # ]: 0 : if (CALCDATASIZE(count, 0) > VARSIZE(hs))
5921 tgl@sss.pgh.pa.us 192 : 0 : return 0;
193 : :
194 [ # # ]: 0 : if (entries[0].pos != 0)
195 : 0 : return 0;
196 : :
197 : : /* key length must be nondecreasing */
198 : :
199 [ # # ]: 0 : for (i = 1; i < count; ++i)
200 : : {
5772 bruce@momjian.us 201 [ # # ]: 0 : if (entries[i].keylen < entries[i - 1].keylen)
5921 tgl@sss.pgh.pa.us 202 : 0 : return 0;
203 : : }
204 : :
205 : : /*
206 : : * entry position must be strictly increasing, except for the first entry
207 : : * (which can be ""=>"" and thus zero-length); and all entries must be
208 : : * properly contiguous
209 : : */
210 : :
211 [ # # ]: 0 : for (i = 0; i < count; ++i)
212 : : {
213 [ # # ]: 0 : if (entries[i].pos != lastpos)
214 : 0 : return 0;
215 : 0 : lastpos += (entries[i].keylen
216 [ # # ]: 0 : + ((entries[i].valisnull) ? 0 : entries[i].vallen));
217 : : }
218 : :
5772 bruce@momjian.us 219 : 0 : vsize = CALCDATASIZE(count, lastpos);
220 : :
5921 tgl@sss.pgh.pa.us 221 [ # # ]: 0 : if (vsize > VARSIZE(hs))
222 : 0 : return 0;
223 : :
224 [ # # ]: 0 : if (vsize != VARSIZE(hs))
225 : 0 : return 1;
226 : :
227 : 0 : return 2;
228 : : }
229 : :
230 : :
231 : : /*
232 : : * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
233 : : */
234 : : HStore *
5921 tgl@sss.pgh.pa.us 235 :CBC 110034 : hstoreUpgrade(Datum orig)
236 : : {
237 : 110034 : HStore *hs = (HStore *) PG_DETOAST_DATUM(orig);
238 : : int valid_new;
239 : : int valid_old;
240 : :
241 : : /* Return immediately if no conversion needed */
2579 rhodiumtoad@postgres 242 [ + - ]: 110034 : if (hs->size_ & HS_FLAG_NEWVERSION)
243 : 110034 : return hs;
244 : :
245 : : /* Do we have a writable copy? If not, make one. */
2579 rhodiumtoad@postgres 246 [ # # ]:UBC 0 : if ((void *) hs == (void *) DatumGetPointer(orig))
247 : 0 : hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
248 : :
249 [ # # # # ]: 0 : if (hs->size_ == 0 ||
5921 tgl@sss.pgh.pa.us 250 [ # # # # ]: 0 : (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
251 : : {
2579 rhodiumtoad@postgres 252 : 0 : HS_SETCOUNT(hs, HS_COUNT(hs));
253 [ # # ]: 0 : HS_FIXSIZE(hs, HS_COUNT(hs));
5921 tgl@sss.pgh.pa.us 254 : 0 : return hs;
255 : : }
256 : :
257 : 0 : valid_new = hstoreValidNewFormat(hs);
258 : 0 : valid_old = hstoreValidOldFormat(hs);
259 : :
260 [ # # # # ]: 0 : if (!valid_old || hs->size_ == 0)
261 : : {
262 [ # # ]: 0 : if (valid_new)
263 : : {
264 : : /*
265 : : * force the "new version" flag and the correct varlena length.
266 : : */
2579 rhodiumtoad@postgres 267 : 0 : HS_SETCOUNT(hs, HS_COUNT(hs));
268 [ # # ]: 0 : HS_FIXSIZE(hs, HS_COUNT(hs));
5921 tgl@sss.pgh.pa.us 269 : 0 : return hs;
270 : : }
271 : : else
272 : : {
5772 bruce@momjian.us 273 [ # # ]: 0 : elog(ERROR, "invalid hstore value found");
274 : : }
275 : : }
276 : :
277 : : /*
278 : : * this is the tricky edge case. It is only possible in some quite extreme
279 : : * cases (the hstore must have had a lot of wasted padding space at the
280 : : * end). But the only way a "new" hstore value could get here is if we're
281 : : * upgrading in place from a pre-release version of hstore-new (NOT
282 : : * contrib/hstore), so we work off the following assumptions: 1. If you're
283 : : * moving from old contrib/hstore to hstore-new, you're required to fix up
284 : : * any potential conflicts first, e.g. by running ALTER TABLE ... USING
285 : : * col::text::hstore; on all hstore columns before upgrading. 2. If you're
286 : : * moving from old contrib/hstore to new contrib/hstore, then "new" values
287 : : * are impossible here 3. If you're moving from pre-release hstore-new to
288 : : * hstore-new, then "old" values are impossible here 4. If you're moving
289 : : * from pre-release hstore-new to new contrib/hstore, you're not doing so
290 : : * as an in-place upgrade, so there is no issue So the upshot of all this
291 : : * is that we can treat all the edge cases as "new" if we're being built
292 : : * as hstore-new, and "old" if we're being built as contrib/hstore.
293 : : *
294 : : * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
295 : : * beta-tested. But for now, it would be very useful to know if anyone can
296 : : * actually reach this case in a non-contrived setting.
297 : : */
298 : :
5921 tgl@sss.pgh.pa.us 299 [ # # ]: 0 : if (valid_new)
300 : : {
301 : : #ifdef HSTORE_IS_HSTORE_NEW
302 : : elog(WARNING, "ambiguous hstore value resolved as hstore-new");
303 : :
304 : : /*
305 : : * force the "new version" flag and the correct varlena length.
306 : : */
307 : : HS_SETCOUNT(hs, HS_COUNT(hs));
308 : : HS_FIXSIZE(hs, HS_COUNT(hs));
309 : : return hs;
310 : : #else
5772 bruce@momjian.us 311 [ # # ]: 0 : elog(WARNING, "ambiguous hstore value resolved as hstore-old");
312 : : #endif
313 : : }
314 : :
315 : : /*
316 : : * must have an old-style value. Overwrite it in place as a new-style one.
317 : : */
318 : : {
319 : 0 : int count = hs->size_;
320 : 0 : HEntry *new_entries = ARRPTR(hs);
321 : 0 : HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
322 : : int i;
323 : :
5921 tgl@sss.pgh.pa.us 324 [ # # ]: 0 : for (i = 0; i < count; ++i)
325 : : {
5772 bruce@momjian.us 326 : 0 : uint32 pos = old_entries[i].pos;
327 : 0 : uint32 keylen = old_entries[i].keylen;
328 : 0 : uint32 vallen = old_entries[i].vallen;
329 : 0 : bool isnull = old_entries[i].valisnull;
330 : :
5921 tgl@sss.pgh.pa.us 331 [ # # ]: 0 : if (isnull)
332 : 0 : vallen = 0;
333 : :
5772 bruce@momjian.us 334 : 0 : new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
335 : 0 : new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
336 [ # # ]: 0 : | ((isnull) ? HENTRY_ISNULL : 0));
337 : : }
338 : :
5921 tgl@sss.pgh.pa.us 339 [ # # ]: 0 : if (count)
340 : 0 : new_entries[0].entry |= HENTRY_ISFIRST;
5772 bruce@momjian.us 341 : 0 : HS_SETCOUNT(hs, count);
342 [ # # ]: 0 : HS_FIXSIZE(hs, count);
343 : : }
344 : :
5921 tgl@sss.pgh.pa.us 345 : 0 : return hs;
346 : : }
347 : :
348 : :
5921 tgl@sss.pgh.pa.us 349 :CBC 7 : PG_FUNCTION_INFO_V1(hstore_version_diag);
350 : : Datum
5921 tgl@sss.pgh.pa.us 351 :UBC 0 : hstore_version_diag(PG_FUNCTION_ARGS)
352 : : {
5772 bruce@momjian.us 353 : 0 : HStore *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
354 : 0 : int valid_new = hstoreValidNewFormat(hs);
355 : 0 : int valid_old = hstoreValidOldFormat(hs);
356 : :
357 : 0 : PG_RETURN_INT32(valid_old * 10 + valid_new);
358 : : }
|