Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * gist.c
4 : : * interface routines for the postgres GiST index access method.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/gist/gist.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/gist_private.h"
18 : : #include "access/gistscan.h"
19 : : #include "access/xloginsert.h"
20 : : #include "catalog/pg_collation.h"
21 : : #include "commands/vacuum.h"
22 : : #include "miscadmin.h"
23 : : #include "nodes/execnodes.h"
24 : : #include "storage/predicate.h"
25 : : #include "utils/fmgrprotos.h"
26 : : #include "utils/index_selfuncs.h"
27 : : #include "utils/memutils.h"
28 : : #include "utils/rel.h"
29 : :
30 : : /* non-export function prototypes */
31 : : static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
32 : : static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
33 : : GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
34 : : static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
35 : : GISTSTATE *giststate,
36 : : IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
37 : : Buffer leftchild, Buffer rightchild,
38 : : bool unlockbuf, bool unlockleftchild);
39 : : static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
40 : : GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
41 : : static void gistprunepage(Relation rel, Page page, Buffer buffer,
42 : : Relation heapRel);
43 : :
44 : :
45 : : #define ROTATEDIST(d) do { \
46 : : SplitPageLayout *tmp = palloc0_object(SplitPageLayout); \
47 : : tmp->block.blkno = InvalidBlockNumber; \
48 : : tmp->buffer = InvalidBuffer; \
49 : : tmp->next = (d); \
50 : : (d)=tmp; \
51 : : } while(0)
52 : :
53 : :
54 : : /*
55 : : * GiST handler function: return IndexAmRoutine with access method parameters
56 : : * and callbacks.
57 : : */
58 : : Datum
3710 tgl@sss.pgh.pa.us 59 :CBC 7252 : gisthandler(PG_FUNCTION_ARGS)
60 : : {
61 : : static const IndexAmRoutine amroutine = {
62 : : .type = T_IndexAmRoutine,
63 : : .amstrategies = 0,
64 : : .amsupport = GISTNProcs,
65 : : .amoptsprocnum = GIST_OPTIONS_PROC,
66 : : .amcanorder = false,
67 : : .amcanorderbyop = true,
68 : : .amcanhash = false,
69 : : .amconsistentequality = false,
70 : : .amconsistentordering = false,
71 : : .amcanbackward = false,
72 : : .amcanunique = false,
73 : : .amcanmulticol = true,
74 : : .amoptionalkey = true,
75 : : .amsearcharray = false,
76 : : .amsearchnulls = true,
77 : : .amstorage = true,
78 : : .amclusterable = true,
79 : : .ampredlocks = true,
80 : : .amcanparallel = false,
81 : : .amcanbuildparallel = false,
82 : : .amcaninclude = true,
83 : : .amusemaintenanceworkmem = false,
84 : : .amsummarizing = false,
85 : : .amparallelvacuumoptions =
86 : : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP,
87 : : .amkeytype = InvalidOid,
88 : :
89 : : .ambuild = gistbuild,
90 : : .ambuildempty = gistbuildempty,
91 : : .aminsert = gistinsert,
92 : : .aminsertcleanup = NULL,
93 : : .ambulkdelete = gistbulkdelete,
94 : : .amvacuumcleanup = gistvacuumcleanup,
95 : : .amcanreturn = gistcanreturn,
96 : : .amcostestimate = gistcostestimate,
97 : : .amgettreeheight = NULL,
98 : : .amoptions = gistoptions,
99 : : .amproperty = gistproperty,
100 : : .ambuildphasename = NULL,
101 : : .amvalidate = gistvalidate,
102 : : .amadjustmembers = gistadjustmembers,
103 : : .ambeginscan = gistbeginscan,
104 : : .amrescan = gistrescan,
105 : : .amgettuple = gistgettuple,
106 : : .amgetbitmap = gistgetbitmap,
107 : : .amendscan = gistendscan,
108 : : .ammarkpos = NULL,
109 : : .amrestrpos = NULL,
110 : : .amestimateparallelscan = NULL,
111 : : .aminitparallelscan = NULL,
112 : : .amparallelrescan = NULL,
113 : : .amtranslatestrategy = NULL,
114 : : .amtranslatecmptype = gisttranslatecmptype,
115 : : };
116 : :
75 tgl@sss.pgh.pa.us 117 :GNC 7252 : PG_RETURN_POINTER(&amroutine);
118 : : }
119 : :
120 : : /*
121 : : * Create and return a temporary memory context for use by GiST. We
122 : : * _always_ invoke user-provided methods in a temporary memory
123 : : * context, so that memory leaks in those functions cannot cause
124 : : * problems. Also, we use some additional temporary contexts in the
125 : : * GiST code itself, to avoid the need to do some awkward manual
126 : : * memory management.
127 : : */
128 : : MemoryContext
7479 bruce@momjian.us 129 :CBC 4902 : createTempGistContext(void)
130 : : {
131 : 4902 : return AllocSetContextCreate(CurrentMemoryContext,
132 : : "GiST temporary context",
133 : : ALLOCSET_DEFAULT_SIZES);
134 : : }
135 : :
136 : : /*
137 : : * gistbuildempty() -- build an empty gist index in the initialization fork
138 : : */
139 : : void
3710 tgl@sss.pgh.pa.us 140 : 5 : gistbuildempty(Relation index)
141 : : {
142 : : Buffer buffer;
143 : :
144 : : /* Initialize the root page */
935 tmunro@postgresql.or 145 : 5 : buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
146 : : EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);
147 : :
148 : : /* Initialize and xlog buffer */
4780 heikki.linnakangas@i 149 : 5 : START_CRIT_SECTION();
150 : 5 : GISTInitBuffer(buffer, F_LEAF);
151 : 5 : MarkBufferDirty(buffer);
4484 152 : 5 : log_newpage_buffer(buffer, true);
4780 153 [ - + ]: 5 : END_CRIT_SECTION();
154 : :
155 : : /* Unlock and release the buffer */
156 : 5 : UnlockReleaseBuffer(buffer);
5555 rhaas@postgresql.org 157 : 5 : }
158 : :
159 : : /*
160 : : * gistinsert -- wrapper for GiST tuple insertion.
161 : : *
162 : : * This is the public interface routine for tuple insertion in GiSTs.
163 : : * It doesn't do any work; just locks the relation and passes the buck.
164 : : */
165 : : bool
3710 tgl@sss.pgh.pa.us 166 : 152195 : gistinsert(Relation r, Datum *values, bool *isnull,
167 : : ItemPointer ht_ctid, Relation heapRel,
168 : : IndexUniqueCheck checkUnique,
169 : : bool indexUnchanged,
170 : : IndexInfo *indexInfo)
171 : : {
3321 172 : 152195 : GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
173 : : IndexTuple itup;
174 : : MemoryContext oldCxt;
175 : :
176 : : /* Initialize GISTSTATE cache if first call in this statement */
177 [ + + ]: 152195 : if (giststate == NULL)
178 : : {
179 : 960 : oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
180 : 960 : giststate = initGISTstate(r);
181 : 960 : giststate->tempCxt = createTempGistContext();
472 peter@eisentraut.org 182 : 960 : indexInfo->ii_AmCache = giststate;
3321 tgl@sss.pgh.pa.us 183 : 960 : MemoryContextSwitchTo(oldCxt);
184 : : }
185 : :
5280 186 : 152195 : oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
187 : :
493 michael@paquier.xyz 188 : 152195 : itup = gistFormTuple(giststate, r, values, isnull, true);
10416 bruce@momjian.us 189 : 152194 : itup->t_tid = *ht_ctid;
190 : :
2538 heikki.linnakangas@i 191 : 152194 : gistdoinsert(r, itup, 0, giststate, heapRel, false);
192 : :
193 : : /* cleanup */
5280 tgl@sss.pgh.pa.us 194 : 152188 : MemoryContextSwitchTo(oldCxt);
3321 195 : 152188 : MemoryContextReset(giststate->tempCxt);
196 : :
3710 197 : 152188 : return false;
198 : : }
199 : :
200 : :
201 : : /*
202 : : * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
203 : : * at that offset is atomically removed along with inserting the new tuples.
204 : : * This is used to replace a tuple with a new one.
205 : : *
206 : : * If 'leftchildbuf' is valid, we're inserting the downlink for the page
207 : : * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
208 : : * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
209 : : *
210 : : * If 'markfollowright' is true and the page is split, the left child is
211 : : * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
212 : : * index build, however, there is no concurrent access and the page splitting
213 : : * is done in a slightly simpler fashion, and false is passed.
214 : : *
215 : : * If there is not enough room on the page, it is split. All the split
216 : : * pages are kept pinned and locked and returned in *splitinfo, the caller
217 : : * is responsible for inserting the downlinks for them. However, if
218 : : * 'buffer' is the root page and it needs to be split, gistplacetopage()
219 : : * performs the split as one atomic operation, and *splitinfo is set to NIL.
220 : : * In that case, we continue to hold the root page locked, and the child
221 : : * pages are released; note that new tuple(s) are *not* on the root page
222 : : * but in one of the new child pages.
223 : : *
224 : : * If 'newblkno' is not NULL, returns the block number of page the first
225 : : * new/updated tuple was inserted to. Usually it's the given page, but could
226 : : * be its right sibling if the page was split.
227 : : *
228 : : * Returns 'true' if the page was split, 'false' otherwise.
229 : : */
230 : : bool
5302 heikki.linnakangas@i 231 : 815729 : gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
232 : : Buffer buffer,
233 : : IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
234 : : BlockNumber *newblkno,
235 : : Buffer leftchildbuf,
236 : : List **splitinfo,
237 : : bool markfollowright,
238 : : Relation heapRel,
239 : : bool is_build)
240 : : {
4959 241 : 815729 : BlockNumber blkno = BufferGetBlockNumber(buffer);
3616 kgrittn@postgresql.o 242 : 815729 : Page page = BufferGetPage(buffer);
5561 heikki.linnakangas@i 243 : 815729 : bool is_leaf = (GistPageIsLeaf(page)) ? true : false;
244 : : XLogRecPtr recptr;
245 : : bool is_split;
246 : :
247 : : /*
248 : : * Refuse to modify a page that's incompletely split. This should not
249 : : * happen because we finish any incomplete splits while we walk down the
250 : : * tree. However, it's remotely possible that another concurrent inserter
251 : : * splits a parent page, and errors out before completing the split. We
252 : : * will just throw an error in that case, and leave any split we had in
253 : : * progress unfinished too. The next insert that comes along will clean up
254 : : * the mess.
255 : : */
256 [ - + ]: 815729 : if (GistFollowRight(page))
5561 heikki.linnakangas@i 257 [ # # ]:UBC 0 : elog(ERROR, "concurrent GiST page split was incomplete");
258 : :
259 : : /* should never try to insert to a deleted page */
1880 heikki.linnakangas@i 260 [ - + ]:CBC 815729 : Assert(!GistPageIsDeleted(page));
261 : :
5561 262 : 815729 : *splitinfo = NIL;
263 : :
264 : : /*
265 : : * if isupdate, remove old key: This node's key has been modified, either
266 : : * because a child split occurred or because we needed to adjust our key
267 : : * for an insert in a child node. Therefore, remove the old version of
268 : : * this node's key.
269 : : *
270 : : * for WAL replay, in the non-split case we handle this by setting up a
271 : : * one-element todelete array; in the split case, it's handled implicitly
272 : : * because the tuple vector passed to gistSplit won't include this tuple.
273 : : */
5302 274 : 815729 : is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
275 : :
276 : : /*
277 : : * If leaf page is full, try at first to delete dead tuples. And then
278 : : * check again.
279 : : */
3840 teodor@sigaev.ru 280 [ + + + + : 815729 : if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
- + ]
281 : : {
2567 heikki.linnakangas@i 282 :UBC 0 : gistprunepage(rel, page, buffer, heapRel);
3840 teodor@sigaev.ru 283 : 0 : is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
284 : : }
285 : :
5561 heikki.linnakangas@i 286 [ + + ]:CBC 815729 : if (is_split)
287 : : {
288 : : /* no space for insertion */
289 : : IndexTuple *itvec;
290 : : int tlen;
803 rhaas@postgresql.org 291 : 12535 : SplitPageLayout *dist = NULL,
292 : : *ptr;
5561 heikki.linnakangas@i 293 : 12535 : BlockNumber oldrlink = InvalidBlockNumber;
45 alvherre@kurilemu.de 294 :GNC 12535 : GistNSN oldnsn = InvalidXLogRecPtr;
295 : : SplitPageLayout rootpg;
296 : : bool is_rootsplit;
297 : : int npage;
298 : :
5561 heikki.linnakangas@i 299 :CBC 12535 : is_rootsplit = (blkno == GIST_ROOT_BLKNO);
300 : :
301 : : /*
302 : : * Form index tuples vector to split. If we're replacing an old tuple,
303 : : * remove the old version from the vector.
304 : : */
305 : 12535 : itvec = gistextractpage(page, &tlen);
306 [ + + + - : 12535 : if (OffsetNumberIsValid(oldoffnum))
+ + ]
307 : : {
308 : : /* on inner page we should remove old tuple */
309 : 2710 : int pos = oldoffnum - FirstOffsetNumber;
310 : :
7102 bruce@momjian.us 311 : 2710 : tlen--;
312 [ + + ]: 2710 : if (pos != tlen)
313 : 1737 : memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
314 : : }
5561 heikki.linnakangas@i 315 : 12535 : itvec = gistjoinvector(itvec, &tlen, itup, ntup);
5302 316 : 12535 : dist = gistSplit(rel, page, itvec, tlen, giststate);
317 : :
318 : : /*
319 : : * Check that split didn't produce too many pages.
320 : : */
4364 321 : 12535 : npage = 0;
322 [ + + ]: 37648 : for (ptr = dist; ptr; ptr = ptr->next)
323 : 25113 : npage++;
324 : : /* in a root split, we'll add one more page to the list below */
325 [ + + ]: 12535 : if (is_rootsplit)
326 : 178 : npage++;
327 [ - + ]: 12535 : if (npage > GIST_MAX_SPLIT_PAGES)
4364 heikki.linnakangas@i 328 [ # # ]:UBC 0 : elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
329 : : npage, GIST_MAX_SPLIT_PAGES);
330 : :
331 : : /*
332 : : * Set up pages to work with. Allocate new buffers for all but the
333 : : * leftmost page. The original page becomes the new leftmost page, and
334 : : * is just replaced with the new contents.
335 : : *
336 : : * For a root-split, allocate new buffers for all child pages, the
337 : : * original page is overwritten with new root page containing
338 : : * downlinks to the new child pages.
339 : : */
5561 heikki.linnakangas@i 340 :CBC 12535 : ptr = dist;
341 [ + + ]: 12535 : if (!is_rootsplit)
342 : : {
343 : : /* save old rightlink and NSN */
344 : 12357 : oldrlink = GistPageGetOpaque(page)->rightlink;
4805 345 : 12357 : oldnsn = GistPageGetNSN(page);
346 : :
5561 347 : 12357 : dist->buffer = buffer;
348 : 12357 : dist->block.blkno = BufferGetBlockNumber(buffer);
3616 kgrittn@postgresql.o 349 : 12357 : dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
350 : :
351 : : /* clean all flags except F_LEAF */
7249 teodor@sigaev.ru 352 : 12357 : GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
353 : :
5561 heikki.linnakangas@i 354 : 12357 : ptr = ptr->next;
355 : : }
356 [ + + ]: 25291 : for (; ptr; ptr = ptr->next)
357 : : {
358 : : /* Allocate new page */
1079 andres@anarazel.de 359 : 12756 : ptr->buffer = gistNewBuffer(rel, heapRel);
5561 heikki.linnakangas@i 360 : 12756 : GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
3616 kgrittn@postgresql.o 361 : 12756 : ptr->page = BufferGetPage(ptr->buffer);
5561 heikki.linnakangas@i 362 : 12756 : ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
2910 teodor@sigaev.ru 363 : 12756 : PredicateLockPageSplit(rel,
364 : : BufferGetBlockNumber(buffer),
365 : : BufferGetBlockNumber(ptr->buffer));
366 : : }
367 : :
368 : : /*
369 : : * Now that we know which blocks the new pages go to, set up downlink
370 : : * tuples to point to them.
371 : : */
7102 bruce@momjian.us 372 [ + + ]: 37648 : for (ptr = dist; ptr; ptr = ptr->next)
373 : : {
5561 heikki.linnakangas@i 374 : 25113 : ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
375 : 25113 : GistTupleSetValid(ptr->itup);
376 : : }
377 : :
378 : : /*
379 : : * If this is a root split, we construct the new root page with the
380 : : * downlinks here directly, instead of requiring the caller to insert
381 : : * them. Add the new root page to the list along with the child pages.
382 : : */
383 [ + + ]: 12535 : if (is_rootsplit)
384 : : {
385 : : IndexTuple *downlinks;
5453 bruce@momjian.us 386 : 178 : int ndownlinks = 0;
387 : : int i;
388 : :
5561 heikki.linnakangas@i 389 : 178 : rootpg.buffer = buffer;
3616 kgrittn@postgresql.o 390 : 178 : rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
5561 heikki.linnakangas@i 391 : 178 : GistPageGetOpaque(rootpg.page)->flags = 0;
392 : :
393 : : /* Prepare a vector of all the downlinks */
394 [ + + ]: 537 : for (ptr = dist; ptr; ptr = ptr->next)
395 : 359 : ndownlinks++;
95 michael@paquier.xyz 396 :GNC 178 : downlinks = palloc_array(IndexTuple, ndownlinks);
5561 heikki.linnakangas@i 397 [ + + ]:CBC 537 : for (i = 0, ptr = dist; ptr; ptr = ptr->next)
398 : 359 : downlinks[i++] = ptr->itup;
399 : :
400 : 178 : rootpg.block.blkno = GIST_ROOT_BLKNO;
401 : 178 : rootpg.block.num = ndownlinks;
402 : 178 : rootpg.list = gistfillitupvec(downlinks, ndownlinks,
403 : : &(rootpg.lenlist));
404 : 178 : rootpg.itup = NULL;
405 : :
406 : 178 : rootpg.next = dist;
407 : 178 : dist = &rootpg;
408 : : }
409 : : else
410 : : {
411 : : /* Prepare split-info to be returned to caller */
412 [ + + ]: 37111 : for (ptr = dist; ptr; ptr = ptr->next)
413 : : {
95 michael@paquier.xyz 414 :GNC 24754 : GISTPageSplitInfo *si = palloc_object(GISTPageSplitInfo);
415 : :
5561 heikki.linnakangas@i 416 :CBC 24754 : si->buf = ptr->buffer;
417 : 24754 : si->downlink = ptr->itup;
418 : 24754 : *splitinfo = lappend(*splitinfo, si);
419 : : }
420 : : }
421 : :
422 : : /*
423 : : * Fill all pages. All the pages are new, ie. freshly allocated empty
424 : : * pages, or a temporary copy of the old page.
425 : : */
426 [ + + ]: 37826 : for (ptr = dist; ptr; ptr = ptr->next)
427 : : {
5453 bruce@momjian.us 428 : 25291 : char *data = (char *) (ptr->list);
429 : :
1299 drowley@postgresql.o 430 [ + + ]: 886099 : for (int i = 0; i < ptr->block.num; i++)
431 : : {
4959 heikki.linnakangas@i 432 : 860808 : IndexTuple thistup = (IndexTuple) data;
433 : :
139 peter@eisentraut.org 434 [ - + ]:GNC 860808 : if (PageAddItem(ptr->page, data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
5302 heikki.linnakangas@i 435 [ # # ]:UBC 0 : elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
436 : :
437 : : /*
438 : : * If this is the first inserted/updated tuple, let the caller
439 : : * know which page it landed on.
440 : : */
4959 heikki.linnakangas@i 441 [ + + + + ]:CBC 860808 : if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
442 : 387 : *newblkno = ptr->block.blkno;
443 : :
444 : 860808 : data += IndexTupleSize(thistup);
445 : : }
446 : :
447 : : /* Set up rightlinks */
5561 448 [ + + + + ]: 25291 : if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
449 : 25156 : GistPageGetOpaque(ptr->page)->rightlink =
450 : 12578 : ptr->next->block.blkno;
451 : : else
452 : 12713 : GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
453 : :
454 : : /*
455 : : * Mark the all but the right-most page with the follow-right
456 : : * flag. It will be cleared as soon as the downlink is inserted
457 : : * into the parent, but this ensures that if we error out before
458 : : * that, the index is still consistent. (in buffering build mode,
459 : : * any error will abort the index build anyway, so this is not
460 : : * needed.)
461 : : */
5302 462 [ + + + + : 25291 : if (ptr->next && !is_rootsplit && markfollowright)
+ + ]
5561 463 : 12013 : GistMarkFollowRight(ptr->page);
464 : : else
465 : 13278 : GistClearFollowRight(ptr->page);
466 : :
467 : : /*
468 : : * Copy the NSN of the original page to all pages. The
469 : : * F_FOLLOW_RIGHT flags ensure that scans will follow the
470 : : * rightlinks until the downlinks are inserted.
471 : : */
4805 472 : 25291 : GistPageSetNSN(ptr->page, oldnsn);
473 : : }
474 : :
475 : : /*
476 : : * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
477 : : * insertion for that. NB: The number of pages and data segments
478 : : * specified here must match the calculations in gistXLogSplit()!
479 : : */
2538 480 [ + + + + : 12535 : if (!is_build && RelationNeedsWAL(rel))
- + - - -
- ]
4133 481 : 1735 : XLogEnsureRecordSpace(npage, 1 + npage * 2);
482 : :
7249 teodor@sigaev.ru 483 : 12535 : START_CRIT_SECTION();
484 : :
485 : : /*
486 : : * Must mark buffers dirty before XLogInsert, even though we'll still
487 : : * be changing their opaque fields below.
488 : : */
7102 bruce@momjian.us 489 [ + + ]: 37826 : for (ptr = dist; ptr; ptr = ptr->next)
7289 tgl@sss.pgh.pa.us 490 : 25291 : MarkBufferDirty(ptr->buffer);
5561 heikki.linnakangas@i 491 [ + + ]: 12535 : if (BufferIsValid(leftchildbuf))
492 : 2627 : MarkBufferDirty(leftchildbuf);
493 : :
494 : : /*
495 : : * The first page in the chain was a temporary working copy meant to
496 : : * replace the old page. Copy it over the old page.
497 : : */
3616 kgrittn@postgresql.o 498 : 12535 : PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
499 : 12535 : dist->page = BufferGetPage(dist->buffer);
500 : :
501 : : /*
502 : : * Write the WAL record.
503 : : *
504 : : * If we're building a new index, however, we don't WAL-log changes
505 : : * yet. The LSN-NSN interlock between parent and child requires that
506 : : * LSNs never move backwards, so set the LSNs to a value that's
507 : : * smaller than any real or fake unlogged LSN that might be generated
508 : : * later. (There can't be any concurrent scans during index build, so
509 : : * we don't need to be able to detect concurrent splits yet.)
510 : : */
2538 heikki.linnakangas@i 511 [ + + ]: 12535 : if (is_build)
512 : 10797 : recptr = GistBuildLSN;
513 : : else
514 : : {
515 [ + + - + : 1738 : if (RelationNeedsWAL(rel))
- - - - ]
516 : 1735 : recptr = gistXLogSplit(is_leaf,
517 : : dist, oldrlink, oldnsn, leftchildbuf,
518 : : markfollowright);
519 : : else
2 pg@bowt.ie 520 :GNC 3 : recptr = XLogGetFakeLSN(rel);
521 : : }
522 : :
7102 bruce@momjian.us 523 [ + + ]:CBC 37826 : for (ptr = dist; ptr; ptr = ptr->next)
5561 heikki.linnakangas@i 524 : 25291 : PageSetLSN(ptr->page, recptr);
525 : :
526 : : /*
527 : : * Return the new child buffers to the caller.
528 : : *
529 : : * If this was a root split, we've already inserted the downlink
530 : : * pointers, in the form of a new root page. Therefore we can release
531 : : * all the new buffers, and keep just the root page locked.
532 : : */
533 [ + + ]: 12535 : if (is_rootsplit)
534 : : {
535 [ + + ]: 537 : for (ptr = dist->next; ptr; ptr = ptr->next)
536 : 359 : UnlockReleaseBuffer(ptr->buffer);
537 : : }
538 : : }
539 : : else
540 : : {
541 : : /*
542 : : * Enough space. We always get here if ntup==0.
543 : : */
7249 teodor@sigaev.ru 544 : 803194 : START_CRIT_SECTION();
545 : :
546 : : /*
547 : : * Delete old tuple if any, then insert new tuple(s) if any. If
548 : : * possible, use the fast path of PageIndexTupleOverwrite.
549 : : */
5561 heikki.linnakangas@i 550 [ + + + - : 803194 : if (OffsetNumberIsValid(oldoffnum))
+ + ]
551 : : {
3474 tgl@sss.pgh.pa.us 552 [ + + ]: 367867 : if (ntup == 1)
553 : : {
554 : : /* One-for-one replacement, so use PageIndexTupleOverwrite */
139 peter@eisentraut.org 555 [ - + ]:GNC 358144 : if (!PageIndexTupleOverwrite(page, oldoffnum, *itup, IndexTupleSize(*itup)))
3474 tgl@sss.pgh.pa.us 556 [ # # ]:UBC 0 : elog(ERROR, "failed to add item to index page in \"%s\"",
557 : : RelationGetRelationName(rel));
558 : : }
559 : : else
560 : : {
561 : : /* Delete old, then append new tuple(s) to page */
3474 tgl@sss.pgh.pa.us 562 :CBC 9723 : PageIndexTupleDelete(page, oldoffnum);
563 : 9723 : gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
564 : : }
565 : : }
566 : : else
567 : : {
568 : : /* Just append new tuples at the end of the page */
569 : 435327 : gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
570 : : }
571 : :
5561 heikki.linnakangas@i 572 : 803194 : MarkBufferDirty(buffer);
573 : :
574 [ + + ]: 803194 : if (BufferIsValid(leftchildbuf))
575 : 9386 : MarkBufferDirty(leftchildbuf);
576 : :
2538 577 [ + + ]: 803194 : if (is_build)
578 : 555829 : recptr = GistBuildLSN;
579 : : else
580 : : {
581 [ + + + + : 247365 : if (RelationNeedsWAL(rel))
+ - + - ]
7479 bruce@momjian.us 582 : 247326 : {
2538 heikki.linnakangas@i 583 : 247326 : OffsetNumber ndeloffs = 0,
584 : : deloffs[1];
585 : :
586 [ + + + - : 247326 : if (OffsetNumberIsValid(oldoffnum))
+ + ]
587 : : {
588 : 96903 : deloffs[0] = oldoffnum;
589 : 96903 : ndeloffs = 1;
590 : : }
591 : :
592 : 247326 : recptr = gistXLogUpdate(buffer,
593 : : deloffs, ndeloffs, itup, ntup,
594 : : leftchildbuf);
595 : : }
596 : : else
2 pg@bowt.ie 597 :GNC 39 : recptr = XLogGetFakeLSN(rel);
598 : : }
2538 heikki.linnakangas@i 599 :CBC 803194 : PageSetLSN(page, recptr);
600 : :
4959 601 [ + + ]: 803194 : if (newblkno)
602 : 52185 : *newblkno = blkno;
603 : : }
604 : :
605 : : /*
606 : : * If we inserted the downlink for a child page, set NSN and clear
607 : : * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
608 : : * follow the rightlink if and only if they looked at the parent page
609 : : * before we inserted the downlink.
610 : : *
611 : : * Note that we do this *after* writing the WAL record. That means that
612 : : * the possible full page image in the WAL record does not include these
613 : : * changes, and they must be replayed even if the page is restored from
614 : : * the full page image. There's a chicken-and-egg problem: if we updated
615 : : * the child pages first, we wouldn't know the recptr of the WAL record
616 : : * we're about to write.
617 : : */
5561 618 [ + + ]: 815729 : if (BufferIsValid(leftchildbuf))
619 : : {
3616 kgrittn@postgresql.o 620 : 12013 : Page leftpg = BufferGetPage(leftchildbuf);
621 : :
4805 heikki.linnakangas@i 622 : 12013 : GistPageSetNSN(leftpg, recptr);
5561 623 : 12013 : GistClearFollowRight(leftpg);
624 : :
625 : 12013 : PageSetLSN(leftpg, recptr);
626 : : }
627 : :
628 [ - + ]: 815729 : END_CRIT_SECTION();
629 : :
630 : 815729 : return is_split;
631 : : }
632 : :
633 : : /*
634 : : * Workhorse routine for doing insertion into a GiST index. Note that
635 : : * this routine assumes it is invoked in a short-lived memory context,
636 : : * so it does not bother releasing palloc'd allocations.
637 : : */
638 : : void
2641 akorotkov@postgresql 639 : 427403 : gistdoinsert(Relation r, IndexTuple itup, Size freespace,
640 : : GISTSTATE *giststate, Relation heapRel, bool is_build)
641 : : {
642 : : ItemId iid;
643 : : IndexTuple idxtuple;
644 : : GISTInsertStack firststack;
645 : : GISTInsertStack *stack;
646 : : GISTInsertState state;
5561 heikki.linnakangas@i 647 : 427403 : bool xlocked = false;
648 : :
649 : 427403 : memset(&state, 0, sizeof(GISTInsertState));
650 : 427403 : state.freespace = freespace;
651 : 427403 : state.r = r;
2641 akorotkov@postgresql 652 : 427403 : state.heapRel = heapRel;
2538 heikki.linnakangas@i 653 : 427403 : state.is_build = is_build;
654 : :
655 : : /* Start from the root */
5561 656 : 427403 : firststack.blkno = GIST_ROOT_BLKNO;
45 alvherre@kurilemu.de 657 :GNC 427403 : firststack.lsn = InvalidXLogRecPtr;
2497 heikki.linnakangas@i 658 :CBC 427403 : firststack.retry_from_parent = false;
5561 659 : 427403 : firststack.parent = NULL;
5357 660 : 427403 : firststack.downlinkoffnum = InvalidOffsetNumber;
5561 661 : 427403 : state.stack = stack = &firststack;
662 : :
663 : : /*
664 : : * Walk down along the path of smallest penalty, updating the parent
665 : : * pointers with the key we're inserting as we go. If we crash in the
666 : : * middle, the tree is consistent, although the possible parent updates
667 : : * were a waste.
668 : : */
669 : : for (;;)
670 : : {
671 : : /*
672 : : * If we split an internal page while descending the tree, we have to
673 : : * retry at the parent. (Normally, the LSN-NSN interlock below would
674 : : * also catch this and cause us to retry. But LSNs are not updated
675 : : * during index build.)
676 : : */
2497 677 [ + + ]: 987235 : while (stack->retry_from_parent)
678 : : {
679 [ - + ]: 158 : if (xlocked)
2497 heikki.linnakangas@i 680 :UBC 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
2497 heikki.linnakangas@i 681 :CBC 158 : xlocked = false;
682 : 158 : ReleaseBuffer(stack->buffer);
683 : 158 : state.stack = stack = stack->parent;
684 : : }
685 : :
129 alvherre@kurilemu.de 686 [ + + ]:GNC 987077 : if (!XLogRecPtrIsValid(stack->lsn))
5561 heikki.linnakangas@i 687 :CBC 987001 : stack->buffer = ReadBuffer(state.r, stack->blkno);
688 : :
689 : : /*
690 : : * Be optimistic and grab shared lock first. Swap it for an exclusive
691 : : * lock later if we need to update the page.
692 : : */
693 [ + + ]: 987077 : if (!xlocked)
694 : : {
695 : 987076 : LockBuffer(stack->buffer, GIST_SHARE);
696 : 987076 : gistcheckpage(state.r, stack->buffer);
697 : : }
698 : :
198 peter@eisentraut.org 699 :GNC 987077 : stack->page = BufferGetPage(stack->buffer);
2987 alvherre@alvh.no-ip. 700 :CBC 987077 : stack->lsn = xlocked ?
701 [ + + ]: 987077 : PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
129 alvherre@kurilemu.de 702 [ + + + + :GNC 987077 : Assert(!RelationNeedsWAL(state.r) || XLogRecPtrIsValid(stack->lsn));
+ + + - -
+ ]
703 : :
704 : : /*
705 : : * If this page was split but the downlink was never inserted to the
706 : : * parent because the inserting backend crashed before doing that, fix
707 : : * that now.
708 : : */
5561 heikki.linnakangas@i 709 [ - + ]:CBC 987077 : if (GistFollowRight(stack->page))
710 : : {
5561 heikki.linnakangas@i 711 [ # # ]:UBC 0 : if (!xlocked)
712 : : {
713 : 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
714 : 0 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
715 : 0 : xlocked = true;
716 : : /* someone might've completed the split when we unlocked */
717 [ # # ]: 0 : if (!GistFollowRight(stack->page))
718 : 0 : continue;
719 : : }
720 : 0 : gistfixsplit(&state, giststate);
721 : :
722 : 0 : UnlockReleaseBuffer(stack->buffer);
723 : 0 : xlocked = false;
724 : 0 : state.stack = stack = stack->parent;
725 : 0 : continue;
726 : : }
727 : :
2426 heikki.linnakangas@i 728 [ + + + - ]:CBC 1546686 : if ((stack->blkno != GIST_ROOT_BLKNO &&
729 : 559609 : stack->parent->lsn < GistPageGetNSN(stack->page)) ||
730 [ - + ]: 987077 : GistPageIsDeleted(stack->page))
731 : : {
732 : : /*
733 : : * Concurrent split or page deletion detected. There's no
734 : : * guarantee that the downlink for this page is consistent with
735 : : * the tuple we're inserting anymore, so go back to parent and
736 : : * rechoose the best child.
737 : : */
5561 heikki.linnakangas@i 738 :UBC 0 : UnlockReleaseBuffer(stack->buffer);
739 : 0 : xlocked = false;
740 : 0 : state.stack = stack = stack->parent;
7566 teodor@sigaev.ru 741 : 0 : continue;
742 : : }
743 : :
5561 heikki.linnakangas@i 744 [ + + ]:CBC 987077 : if (!GistPageIsLeaf(stack->page))
745 : : {
746 : : /*
747 : : * This is an internal page so continue to walk down the tree.
748 : : * Find the child node that has the minimum insertion penalty.
749 : : */
750 : : BlockNumber childblkno;
751 : : IndexTuple newtup;
752 : : GISTInsertStack *item;
753 : : OffsetNumber downlinkoffnum;
754 : :
5357 755 : 559674 : downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
756 : 559674 : iid = PageGetItemId(stack->page, downlinkoffnum);
5561 757 : 559674 : idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
758 : 559674 : childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
759 : :
760 : : /*
761 : : * Check that it's not a leftover invalid tuple from pre-9.1
762 : : */
763 [ - + ]: 559674 : if (GistTupleIsInvalid(idxtuple))
5561 heikki.linnakangas@i 764 [ # # ]:UBC 0 : ereport(ERROR,
765 : : (errmsg("index \"%s\" contains an inner tuple marked as invalid",
766 : : RelationGetRelationName(r)),
767 : : errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
768 : : errhint("Please REINDEX it.")));
769 : :
770 : : /*
771 : : * Check that the key representing the target child node is
772 : : * consistent with the key we're inserting. Update it if it's not.
773 : : */
5561 heikki.linnakangas@i 774 :CBC 559674 : newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
775 [ + + ]: 559674 : if (newtup)
776 : : {
777 : : /*
778 : : * Swap shared lock for an exclusive one. Beware, the page may
779 : : * change while we unlock/lock the page...
780 : : */
781 [ + - ]: 323747 : if (!xlocked)
782 : : {
783 : 323747 : LockBuffer(stack->buffer, GIST_UNLOCK);
784 : 323747 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
785 : 323747 : xlocked = true;
198 peter@eisentraut.org 786 :GNC 323747 : stack->page = BufferGetPage(stack->buffer);
787 : :
4825 alvherre@alvh.no-ip. 788 [ - + ]:CBC 323747 : if (PageGetLSN(stack->page) != stack->lsn)
789 : : {
790 : : /* the page was changed while we unlocked it, retry */
5561 heikki.linnakangas@i 791 :UBC 0 : continue;
792 : : }
793 : : }
794 : :
795 : : /*
796 : : * Update the tuple.
797 : : *
798 : : * We still hold the lock after gistinserttuple(), but it
799 : : * might have to split the page to make the updated tuple fit.
800 : : * In that case the updated tuple might migrate to the other
801 : : * half of the split, so we have to go back to the parent and
802 : : * descend back to the half that's a better fit for the new
803 : : * tuple.
804 : : */
5056 heikki.linnakangas@i 805 [ + + ]:CBC 323747 : if (gistinserttuple(&state, stack, giststate, newtup,
806 : : downlinkoffnum))
807 : : {
808 : : /*
809 : : * If this was a root split, the root page continues to be
810 : : * the parent and the updated tuple went to one of the
811 : : * child pages, so we just need to retry from the root
812 : : * page.
813 : : */
5544 814 [ + + ]: 76 : if (stack->blkno != GIST_ROOT_BLKNO)
815 : : {
816 : 75 : UnlockReleaseBuffer(stack->buffer);
817 : 75 : xlocked = false;
818 : 75 : state.stack = stack = stack->parent;
819 : : }
5561 820 : 76 : continue;
821 : : }
822 : : }
823 : 559598 : LockBuffer(stack->buffer, GIST_UNLOCK);
824 : 559598 : xlocked = false;
825 : :
826 : : /* descend to the chosen child */
95 michael@paquier.xyz 827 :GNC 559598 : item = palloc0_object(GISTInsertStack);
5561 heikki.linnakangas@i 828 :CBC 559598 : item->blkno = childblkno;
829 : 559598 : item->parent = stack;
5357 830 : 559598 : item->downlinkoffnum = downlinkoffnum;
5561 831 : 559598 : state.stack = stack = item;
832 : : }
833 : : else
834 : : {
835 : : /*
836 : : * Leaf page. Insert the new key. We've already updated all the
837 : : * parents on the way down, but we might have to split the page if
838 : : * it doesn't fit. gistinserttuple() will take care of that.
839 : : */
840 : :
841 : : /*
842 : : * Swap shared lock for an exclusive one. Be careful, the page may
843 : : * change while we unlock/lock the page...
844 : : */
845 [ + - ]: 427403 : if (!xlocked)
846 : : {
847 : 427403 : LockBuffer(stack->buffer, GIST_UNLOCK);
848 : 427403 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
849 : 427403 : xlocked = true;
198 peter@eisentraut.org 850 :GNC 427403 : stack->page = BufferGetPage(stack->buffer);
5561 heikki.linnakangas@i 851 :CBC 427403 : stack->lsn = PageGetLSN(stack->page);
852 : :
853 [ + + ]: 427403 : if (stack->blkno == GIST_ROOT_BLKNO)
854 : : {
855 : : /*
856 : : * the only page that can become inner instead of leaf is
857 : : * the root page, so for root we should recheck it
858 : : */
859 [ - + ]: 19451 : if (!GistPageIsLeaf(stack->page))
860 : : {
861 : : /*
862 : : * very rare situation: during unlock/lock index with
863 : : * number of pages = 1 was increased
864 : : */
5561 heikki.linnakangas@i 865 :UBC 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
866 : 0 : xlocked = false;
867 : 0 : continue;
868 : : }
869 : :
870 : : /*
871 : : * we don't need to check root split, because checking
872 : : * leaf/inner is enough to recognize split for root
873 : : */
874 : : }
2426 heikki.linnakangas@i 875 [ + - + - ]:CBC 815904 : else if ((GistFollowRight(stack->page) ||
1880 876 : 407952 : stack->parent->lsn < GistPageGetNSN(stack->page)) ||
2426 877 [ - + ]: 407952 : GistPageIsDeleted(stack->page))
878 : : {
879 : : /*
880 : : * The page was split or deleted while we momentarily
881 : : * unlocked the page. Go back to parent.
882 : : */
5561 heikki.linnakangas@i 883 :UBC 0 : UnlockReleaseBuffer(stack->buffer);
884 : 0 : xlocked = false;
885 : 0 : state.stack = stack = stack->parent;
886 : 0 : continue;
887 : : }
888 : : }
889 : :
890 : : /* now state.stack->(page, buffer and blkno) points to leaf page */
891 : :
5056 heikki.linnakangas@i 892 :CBC 427403 : gistinserttuple(&state, stack, giststate, itup,
893 : : InvalidOffsetNumber);
5561 894 : 427397 : LockBuffer(stack->buffer, GIST_UNLOCK);
895 : :
896 : : /* Release any pins we might still hold before exiting */
897 [ + + ]: 1414153 : for (; stack; stack = stack->parent)
898 : 986756 : ReleaseBuffer(stack->buffer);
7579 teodor@sigaev.ru 899 : 427397 : break;
900 : : }
901 : : }
7566 902 : 427397 : }
903 : :
904 : : /*
905 : : * Traverse the tree to find path from root page to specified "child" block.
906 : : *
907 : : * returns a new insertion stack, starting from the parent of "child", up
908 : : * to the root. *downlinkoffnum is set to the offset of the downlink in the
909 : : * direct parent of child.
910 : : *
911 : : * To prevent deadlocks, this should lock only one page at a time.
912 : : */
913 : : static GISTInsertStack *
5357 heikki.linnakangas@i 914 :UBC 0 : gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
915 : : {
916 : : Page page;
917 : : Buffer buffer;
918 : : OffsetNumber i,
919 : : maxoff;
920 : : ItemId iid;
921 : : IndexTuple idxtuple;
922 : : List *fifo;
923 : : GISTInsertStack *top,
924 : : *ptr;
925 : : BlockNumber blkno;
926 : :
95 michael@paquier.xyz 927 :UNC 0 : top = palloc0_object(GISTInsertStack);
7566 teodor@sigaev.ru 928 :UBC 0 : top->blkno = GIST_ROOT_BLKNO;
5357 heikki.linnakangas@i 929 : 0 : top->downlinkoffnum = InvalidOffsetNumber;
930 : :
931 : 0 : fifo = list_make1(top);
932 [ # # ]: 0 : while (fifo != NIL)
933 : : {
934 : : /* Get next page to visit */
935 : 0 : top = linitial(fifo);
936 : 0 : fifo = list_delete_first(fifo);
937 : :
7290 tgl@sss.pgh.pa.us 938 : 0 : buffer = ReadBuffer(r, top->blkno);
939 : 0 : LockBuffer(buffer, GIST_SHARE);
7434 940 : 0 : gistcheckpage(r, buffer);
198 peter@eisentraut.org 941 :UNC 0 : page = BufferGetPage(buffer);
942 : :
7479 bruce@momjian.us 943 [ # # ]:UBC 0 : if (GistPageIsLeaf(page))
944 : : {
945 : : /*
946 : : * Because we scan the index top-down, all the rest of the pages
947 : : * in the queue must be leaf pages as well.
948 : : */
7289 tgl@sss.pgh.pa.us 949 : 0 : UnlockReleaseBuffer(buffer);
5357 heikki.linnakangas@i 950 : 0 : break;
951 : : }
952 : :
953 : : /* currently, internal pages are never deleted */
2426 954 [ # # ]: 0 : Assert(!GistPageIsDeleted(page));
955 : :
2987 alvherre@alvh.no-ip. 956 : 0 : top->lsn = BufferGetLSNAtomic(buffer);
957 : :
958 : : /*
959 : : * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
960 : : * downlink. This should not normally happen..
961 : : */
5561 heikki.linnakangas@i 962 [ # # ]: 0 : if (GistFollowRight(page))
963 [ # # ]: 0 : elog(ERROR, "concurrent GiST page split was incomplete");
964 : :
4805 965 [ # # # # ]: 0 : if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
7479 bruce@momjian.us 966 [ # # ]: 0 : GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
967 : : {
968 : : /*
969 : : * Page was split while we looked elsewhere. We didn't see the
970 : : * downlink to the right page when we scanned the parent, so add
971 : : * it to the queue now.
972 : : *
973 : : * Put the right page ahead of the queue, so that we visit it
974 : : * next. That's important, because if this is the lowest internal
975 : : * level, just above leaves, we might already have queued up some
976 : : * leaf pages, and we assume that there can't be any non-leaf
977 : : * pages behind leaf pages.
978 : : */
95 michael@paquier.xyz 979 :UNC 0 : ptr = palloc0_object(GISTInsertStack);
7566 teodor@sigaev.ru 980 :UBC 0 : ptr->blkno = GistPageGetOpaque(page)->rightlink;
5357 heikki.linnakangas@i 981 : 0 : ptr->downlinkoffnum = InvalidOffsetNumber;
982 : 0 : ptr->parent = top->parent;
983 : :
984 : 0 : fifo = lcons(ptr, fifo);
985 : : }
986 : :
7566 teodor@sigaev.ru 987 : 0 : maxoff = PageGetMaxOffsetNumber(page);
988 : :
7479 bruce@momjian.us 989 [ # # ]: 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
990 : : {
7566 teodor@sigaev.ru 991 : 0 : iid = PageGetItemId(page, i);
992 : 0 : idxtuple = (IndexTuple) PageGetItem(page, iid);
993 : 0 : blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
7479 bruce@momjian.us 994 [ # # ]: 0 : if (blkno == child)
995 : : {
996 : : /* Found it! */
7289 tgl@sss.pgh.pa.us 997 : 0 : UnlockReleaseBuffer(buffer);
5357 heikki.linnakangas@i 998 : 0 : *downlinkoffnum = i;
7566 teodor@sigaev.ru 999 : 0 : return top;
1000 : : }
1001 : : else
1002 : : {
1003 : : /* Append this child to the list of pages to visit later */
95 michael@paquier.xyz 1004 :UNC 0 : ptr = palloc0_object(GISTInsertStack);
7566 teodor@sigaev.ru 1005 :UBC 0 : ptr->blkno = blkno;
5357 heikki.linnakangas@i 1006 : 0 : ptr->downlinkoffnum = i;
7566 teodor@sigaev.ru 1007 : 0 : ptr->parent = top;
1008 : :
5357 heikki.linnakangas@i 1009 : 0 : fifo = lappend(fifo, ptr);
1010 : : }
1011 : : }
1012 : :
7289 tgl@sss.pgh.pa.us 1013 : 0 : UnlockReleaseBuffer(buffer);
1014 : : }
1015 : :
5357 heikki.linnakangas@i 1016 [ # # ]: 0 : elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
1017 : : RelationGetRelationName(r), child);
1018 : : return NULL; /* keep compiler quiet */
1019 : : }
1020 : :
1021 : : /*
1022 : : * Updates the stack so that child->parent is the correct parent of the
1023 : : * child. child->parent must be exclusively locked on entry, and will
1024 : : * remain so at exit, but it might not be the same page anymore.
1025 : : */
1026 : : static void
901 heikki.linnakangas@i 1027 :CBC 12013 : gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)
1028 : : {
7479 bruce@momjian.us 1029 : 12013 : GISTInsertStack *parent = child->parent;
1030 : : ItemId iid;
1031 : : IndexTuple idxtuple;
1032 : : OffsetNumber maxoff;
1033 : : GISTInsertStack *ptr;
1034 : :
7434 tgl@sss.pgh.pa.us 1035 : 12013 : gistcheckpage(r, parent->buffer);
198 peter@eisentraut.org 1036 :GNC 12013 : parent->page = BufferGetPage(parent->buffer);
901 heikki.linnakangas@i 1037 :CBC 12013 : maxoff = PageGetMaxOffsetNumber(parent->page);
1038 : :
1039 : : /* Check if the downlink is still where it was before */
1040 [ + + + - ]: 12013 : if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)
1041 : : {
1042 : 12005 : iid = PageGetItemId(parent->page, child->downlinkoffnum);
1043 : 12005 : idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1044 [ + - ]: 12005 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1045 : 12005 : return; /* still there */
1046 : : }
1047 : :
1048 : : /*
1049 : : * The page has changed since we looked. During normal operation, every
1050 : : * update of a page changes its LSN, so the LSN we memorized should have
1051 : : * changed too.
1052 : : *
1053 : : * During index build, however, we don't WAL-log the changes until we have
1054 : : * built the index, so the LSN doesn't change. There is no concurrent
1055 : : * activity during index build, but we might have changed the parent
1056 : : * ourselves.
1057 : : *
1058 : : * We will also get here if child->downlinkoffnum is invalid. That happens
1059 : : * if 'parent' had been updated by an earlier call to this function on its
1060 : : * grandchild, which had to move right.
1061 : : */
345 1062 [ - + + - : 8 : Assert(parent->lsn != PageGetLSN(parent->page) || is_build ||
- - ]
1063 : : child->downlinkoffnum == InvalidOffsetNumber);
1064 : :
1065 : : /*
1066 : : * Scan the page to re-find the downlink. If the page was split, it might
1067 : : * have moved to a different page, so follow the right links until we find
1068 : : * it.
1069 : : */
1070 : : while (true)
901 1071 : 6 : {
1072 : : OffsetNumber i;
1073 : :
1074 : 14 : maxoff = PageGetMaxOffsetNumber(parent->page);
1075 [ + + ]: 63 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1076 : : {
1077 : 57 : iid = PageGetItemId(parent->page, i);
1078 : 57 : idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1079 [ + + ]: 57 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1080 : : {
1081 : : /* yes!!, found */
1082 : 8 : child->downlinkoffnum = i;
1083 : 8 : return;
1084 : : }
1085 : : }
1086 : :
1087 : 6 : parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
1088 : 6 : parent->downlinkoffnum = InvalidOffsetNumber;
1089 : 6 : UnlockReleaseBuffer(parent->buffer);
1090 [ - + ]: 6 : if (parent->blkno == InvalidBlockNumber)
1091 : : {
1092 : : /*
1093 : : * End of chain and still didn't find parent. It's a very-very
1094 : : * rare situation when the root was split.
1095 : : */
901 heikki.linnakangas@i 1096 :UBC 0 : break;
1097 : : }
901 heikki.linnakangas@i 1098 :CBC 6 : parent->buffer = ReadBuffer(r, parent->blkno);
1099 : 6 : LockBuffer(parent->buffer, GIST_EXCLUSIVE);
1100 : 6 : gistcheckpage(r, parent->buffer);
198 peter@eisentraut.org 1101 :GNC 6 : parent->page = BufferGetPage(parent->buffer);
1102 : : }
1103 : :
1104 : : /*
1105 : : * awful!!, we need search tree to find parent ... , but before we should
1106 : : * release all old parent
1107 : : */
1108 : :
901 heikki.linnakangas@i 1109 :UBC 0 : ptr = child->parent->parent; /* child->parent already released above */
1110 [ # # ]: 0 : while (ptr)
1111 : : {
1112 : 0 : ReleaseBuffer(ptr->buffer);
1113 : 0 : ptr = ptr->parent;
1114 : : }
1115 : :
1116 : : /* ok, find new path */
1117 : 0 : ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
1118 : :
1119 : : /* read all buffers as expected by caller */
1120 : : /* note we don't lock them or gistcheckpage them here! */
1121 [ # # ]: 0 : while (ptr)
1122 : : {
1123 : 0 : ptr->buffer = ReadBuffer(r, ptr->blkno);
198 peter@eisentraut.org 1124 :UNC 0 : ptr->page = BufferGetPage(ptr->buffer);
901 heikki.linnakangas@i 1125 :UBC 0 : ptr = ptr->parent;
1126 : : }
1127 : :
1128 : : /* install new chain of parents to stack */
1129 : 0 : child->parent = parent;
1130 : :
1131 : : /* make recursive call to normal processing */
1132 : 0 : LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
1133 : 0 : gistFindCorrectParent(r, child, is_build);
1134 : : }
1135 : :
1136 : : /*
1137 : : * Form a downlink pointer for the page in 'buf'.
1138 : : */
1139 : : static IndexTuple
5561 1140 : 0 : gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
1141 : : GISTInsertStack *stack, bool is_build)
1142 : : {
3616 kgrittn@postgresql.o 1143 : 0 : Page page = BufferGetPage(buf);
1144 : : OffsetNumber maxoff;
1145 : : OffsetNumber offset;
5453 bruce@momjian.us 1146 : 0 : IndexTuple downlink = NULL;
1147 : :
5561 heikki.linnakangas@i 1148 : 0 : maxoff = PageGetMaxOffsetNumber(page);
1149 [ # # ]: 0 : for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
1150 : : {
1151 : : IndexTuple ituple = (IndexTuple)
1031 tgl@sss.pgh.pa.us 1152 : 0 : PageGetItem(page, PageGetItemId(page, offset));
1153 : :
5561 heikki.linnakangas@i 1154 [ # # ]: 0 : if (downlink == NULL)
1155 : 0 : downlink = CopyIndexTuple(ituple);
1156 : : else
1157 : : {
1158 : : IndexTuple newdownlink;
1159 : :
1160 : 0 : newdownlink = gistgetadjusted(rel, downlink, ituple,
1161 : : giststate);
1162 [ # # ]: 0 : if (newdownlink)
1163 : 0 : downlink = newdownlink;
1164 : : }
1165 : : }
1166 : :
1167 : : /*
1168 : : * If the page is completely empty, we can't form a meaningful downlink
1169 : : * for it. But we have to insert a downlink for the page. Any key will do,
1170 : : * as long as its consistent with the downlink of parent page, so that we
1171 : : * can legally insert it to the parent. A minimal one that matches as few
1172 : : * scans as possible would be best, to keep scans from doing useless work,
1173 : : * but we don't know how to construct that. So we just use the downlink of
1174 : : * the original page that was split - that's as far from optimal as it can
1175 : : * get but will do..
1176 : : */
1177 [ # # ]: 0 : if (!downlink)
1178 : : {
1179 : : ItemId iid;
1180 : :
1181 : 0 : LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
901 1182 : 0 : gistFindCorrectParent(rel, stack, is_build);
5357 1183 : 0 : iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
5561 1184 : 0 : downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
1185 : 0 : downlink = CopyIndexTuple(downlink);
1186 : 0 : LockBuffer(stack->parent->buffer, GIST_UNLOCK);
1187 : : }
1188 : :
1189 : 0 : ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
1190 : 0 : GistTupleSetValid(downlink);
1191 : :
1192 : 0 : return downlink;
1193 : : }
1194 : :
1195 : :
1196 : : /*
1197 : : * Complete the incomplete split of state->stack->page.
1198 : : */
1199 : : static void
1200 : 0 : gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
1201 : : {
1202 : 0 : GISTInsertStack *stack = state->stack;
1203 : : Buffer buf;
1204 : : Page page;
1205 : 0 : List *splitinfo = NIL;
1206 : :
1927 peter@eisentraut.org 1207 [ # # ]: 0 : ereport(LOG,
1208 : : (errmsg("fixing incomplete split in index \"%s\", block %u",
1209 : : RelationGetRelationName(state->r), stack->blkno)));
1210 : :
5561 heikki.linnakangas@i 1211 [ # # ]: 0 : Assert(GistFollowRight(stack->page));
5357 1212 [ # # # # : 0 : Assert(OffsetNumberIsValid(stack->downlinkoffnum));
# # ]
1213 : :
5561 1214 : 0 : buf = stack->buffer;
1215 : :
1216 : : /*
1217 : : * Read the chain of split pages, following the rightlinks. Construct a
1218 : : * downlink tuple for each page.
1219 : : */
1220 : : for (;;)
1221 : 0 : {
95 michael@paquier.xyz 1222 :UNC 0 : GISTPageSplitInfo *si = palloc_object(GISTPageSplitInfo);
1223 : : IndexTuple downlink;
1224 : :
3616 kgrittn@postgresql.o 1225 :UBC 0 : page = BufferGetPage(buf);
1226 : :
1227 : : /* Form the new downlink tuples to insert to parent */
901 heikki.linnakangas@i 1228 : 0 : downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);
1229 : :
5561 1230 : 0 : si->buf = buf;
1231 : 0 : si->downlink = downlink;
1232 : :
1233 : 0 : splitinfo = lappend(splitinfo, si);
1234 : :
1235 [ # # ]: 0 : if (GistFollowRight(page))
1236 : : {
1237 : : /* lock next page */
1238 : 0 : buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
1239 : 0 : LockBuffer(buf, GIST_EXCLUSIVE);
1240 : : }
1241 : : else
1242 : 0 : break;
1243 : : }
1244 : :
1245 : : /* Insert the downlinks */
5056 1246 : 0 : gistfinishsplit(state, stack, giststate, splitinfo, false);
5561 1247 : 0 : }
1248 : :
1249 : : /*
1250 : : * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1251 : : * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
1252 : : * 'stack' represents the path from the root to the page being updated.
1253 : : *
1254 : : * The caller must hold an exclusive lock on stack->buffer. The lock is still
1255 : : * held on return, but the page might not contain the inserted tuple if the
1256 : : * page was split. The function returns true if the page was split, false
1257 : : * otherwise.
1258 : : */
1259 : : static bool
5056 heikki.linnakangas@i 1260 :CBC 751150 : gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
1261 : : GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
1262 : : {
1263 : 751150 : return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
1264 : : InvalidBuffer, InvalidBuffer, false, false);
1265 : : }
1266 : :
1267 : : /* ----------------
1268 : : * An extended workhorse version of gistinserttuple(). This version allows
1269 : : * inserting multiple tuples, or replacing a single tuple with multiple tuples.
1270 : : * This is used to recursively update the downlinks in the parent when a page
1271 : : * is split.
1272 : : *
1273 : : * If leftchild and rightchild are valid, we're inserting/replacing the
1274 : : * downlink for rightchild, and leftchild is its left sibling. We clear the
1275 : : * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1276 : : * insertion of the downlink.
1277 : : *
1278 : : * To avoid holding locks for longer than necessary, when recursing up the
1279 : : * tree to update the parents, the locking is a bit peculiar here. On entry,
1280 : : * the caller must hold an exclusive lock on stack->buffer, as well as
1281 : : * leftchild and rightchild if given. On return:
1282 : : *
1283 : : * - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1284 : : * always kept pinned, however.
1285 : : * - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1286 : : * is kept pinned.
1287 : : * - Lock and pin on 'rightchild' are always released.
1288 : : *
1289 : : * Returns 'true' if the page had to be split. Note that if the page was
1290 : : * split, the inserted/updated tuples might've been inserted to a right
1291 : : * sibling of stack->buffer instead of stack->buffer itself.
1292 : : */
1293 : : static bool
5561 1294 : 763163 : gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
1295 : : GISTSTATE *giststate,
1296 : : IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
1297 : : Buffer leftchild, Buffer rightchild,
1298 : : bool unlockbuf, bool unlockleftchild)
1299 : : {
1300 : : List *splitinfo;
1301 : : bool is_split;
1302 : :
1303 : : /*
1304 : : * Check for any rw conflicts (in serializable isolation level) just
1305 : : * before we intend to modify the page
1306 : : */
2238 tmunro@postgresql.or 1307 : 763163 : CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));
1308 : :
1309 : : /* Insert the tuple(s) to the page, splitting the page if necessary */
5302 heikki.linnakangas@i 1310 : 763157 : is_split = gistplacetopage(state->r, state->freespace, giststate,
1311 : : stack->buffer,
1312 : : tuples, ntup,
1313 : : oldoffnum, NULL,
1314 : : leftchild,
1315 : : &splitinfo,
1316 : : true,
1317 : : state->heapRel,
2538 1318 : 763157 : state->is_build);
1319 : :
1320 : : /*
1321 : : * Before recursing up in case the page was split, release locks on the
1322 : : * child pages. We don't need to keep them locked when updating the
1323 : : * parent.
1324 : : */
5056 1325 [ + + ]: 763157 : if (BufferIsValid(rightchild))
1326 : 12013 : UnlockReleaseBuffer(rightchild);
1327 [ + + + + ]: 763157 : if (BufferIsValid(leftchild) && unlockleftchild)
1328 : 2566 : LockBuffer(leftchild, GIST_UNLOCK);
1329 : :
1330 : : /*
1331 : : * If we had to split, insert/update the downlinks in the parent. If the
1332 : : * caller requested us to release the lock on stack->buffer, tell
1333 : : * gistfinishsplit() to do that as soon as it's safe to do so. If we
1334 : : * didn't have to split, release it ourselves.
1335 : : */
5561 1336 [ + + ]: 763157 : if (splitinfo)
5056 1337 : 11973 : gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
1338 [ + + ]: 751184 : else if (unlockbuf)
1339 : 9407 : LockBuffer(stack->buffer, GIST_UNLOCK);
1340 : :
5561 1341 : 763157 : return is_split;
1342 : : }
1343 : :
1344 : : /*
1345 : : * Finish an incomplete split by inserting/updating the downlinks in parent
1346 : : * page. 'splitinfo' contains all the child pages involved in the split,
1347 : : * from left-to-right.
1348 : : *
1349 : : * On entry, the caller must hold a lock on stack->buffer and all the child
1350 : : * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1351 : : * released on return. The child pages are always unlocked and unpinned.
1352 : : */
1353 : : static void
1354 : 11973 : gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
1355 : : GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
1356 : : {
1357 : : GISTPageSplitInfo *right;
1358 : : GISTPageSplitInfo *left;
1359 : : IndexTuple tuples[2];
1360 : :
1361 : : /* A split always contains at least two halves */
1362 [ - + ]: 11973 : Assert(list_length(splitinfo) >= 2);
1363 : :
1364 : : /*
1365 : : * We need to insert downlinks for each new page, and update the downlink
1366 : : * for the original (leftmost) page in the split. Begin at the rightmost
1367 : : * page, inserting one downlink at a time until there's only two pages
1368 : : * left. Finally insert the downlink for the last new page and update the
1369 : : * downlink for the original page as one operation.
1370 : : */
1371 : 11973 : LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1372 : :
1373 : : /*
1374 : : * Insert downlinks for the siblings from right to left, until there are
1375 : : * only two siblings left.
1376 : : */
2433 tgl@sss.pgh.pa.us 1377 [ + + ]: 12013 : for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
1378 : : {
1379 : 40 : right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
1380 : 40 : left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
1381 : :
901 heikki.linnakangas@i 1382 : 40 : gistFindCorrectParent(state->r, stack, state->is_build);
5561 1383 [ + + ]: 40 : if (gistinserttuples(state, stack->parent, giststate,
1384 : : &right->downlink, 1,
1385 : : InvalidOffsetNumber,
1386 : : left->buf, right->buf, false, false))
1387 : : {
1388 : : /*
1389 : : * If the parent page was split, the existing downlink might have
1390 : : * moved.
1391 : : */
2284 1392 : 2 : stack->downlinkoffnum = InvalidOffsetNumber;
1393 : : }
1394 : : /* gistinserttuples() released the lock on right->buf. */
1395 : : }
1396 : :
2433 tgl@sss.pgh.pa.us 1397 : 11973 : right = (GISTPageSplitInfo *) lsecond(splitinfo);
1398 : 11973 : left = (GISTPageSplitInfo *) linitial(splitinfo);
1399 : :
1400 : : /*
1401 : : * Finally insert downlink for the remaining right page and update the
1402 : : * downlink for the original page to not contain the tuples that were
1403 : : * moved to the new pages.
1404 : : */
5561 heikki.linnakangas@i 1405 : 11973 : tuples[0] = left->downlink;
1406 : 11973 : tuples[1] = right->downlink;
901 1407 : 11973 : gistFindCorrectParent(state->r, stack, state->is_build);
1408 : 11973 : (void) gistinserttuples(state, stack->parent, giststate,
1409 : : tuples, 2,
1410 : 11973 : stack->downlinkoffnum,
1411 : : left->buf, right->buf,
1412 : : true, /* Unlock parent */
1413 : : unlockbuf /* Unlock stack->buffer if caller
1414 : : * wants that */
1415 : : );
1416 : :
1417 : : /*
1418 : : * The downlink might have moved when we updated it. Even if the page
1419 : : * wasn't split, because gistinserttuples() implements updating the old
1420 : : * tuple by removing and re-inserting it!
1421 : : */
1422 : 11973 : stack->downlinkoffnum = InvalidOffsetNumber;
1423 : :
5561 1424 [ - + ]: 11973 : Assert(left->buf == stack->buffer);
1425 : :
1426 : : /*
1427 : : * If we split the page because we had to adjust the downlink on an
1428 : : * internal page, while descending the tree for inserting a new tuple,
1429 : : * then this might no longer be the correct page for the new tuple. The
1430 : : * downlink to this page might not cover the new tuple anymore, it might
1431 : : * need to go to the newly-created right sibling instead. Tell the caller
1432 : : * to walk back up the stack, to re-check at the parent which page to
1433 : : * insert to.
1434 : : *
1435 : : * Normally, the LSN-NSN interlock during the tree descend would also
1436 : : * detect that a concurrent split happened (by ourselves), and cause us to
1437 : : * retry at the parent. But that mechanism doesn't work during index
1438 : : * build, because we don't do WAL-logging, and don't update LSNs, during
1439 : : * index build.
1440 : : */
2497 1441 : 11973 : stack->retry_from_parent = true;
7573 teodor@sigaev.ru 1442 : 11973 : }
1443 : :
1444 : : /*
1445 : : * gistSplit -- split a page in the tree and fill struct
1446 : : * used for XLOG and real writes buffers. Function is recursive, ie
1447 : : * it will split page until keys will fit in every page.
1448 : : */
1449 : : SplitPageLayout *
10793 scrappy@hub.org 1450 : 13299 : gistSplit(Relation r,
1451 : : Page page,
1452 : : IndexTuple *itup, /* contains compressed entry */
1453 : : int len,
1454 : : GISTSTATE *giststate)
1455 : : {
1456 : : IndexTuple *lvectup,
1457 : : *rvectup;
1458 : : GistSplitVector v;
1459 : : int i;
803 rhaas@postgresql.org 1460 : 13299 : SplitPageLayout *res = NULL;
1461 : :
1462 : : /* this should never recurse very deeply, but better safe than sorry */
4181 heikki.linnakangas@i 1463 : 13299 : check_stack_depth();
1464 : :
1465 : : /* there's no point in splitting an empty page */
1466 [ - + ]: 13299 : Assert(len > 0);
1467 : :
1468 : : /*
1469 : : * If a single tuple doesn't fit on a page, no amount of splitting will
1470 : : * help.
1471 : : */
1472 [ - + ]: 13299 : if (len == 1)
4181 heikki.linnakangas@i 1473 [ # # ]:UBC 0 : ereport(ERROR,
1474 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1475 : : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
1476 : : IndexTupleSize(itup[0]), GiSTPageSize,
1477 : : RelationGetRelationName(r))));
1478 : :
2562 akorotkov@postgresql 1479 :CBC 13299 : memset(v.spl_lisnull, true,
1480 : 13299 : sizeof(bool) * giststate->nonLeafTupdesc->natts);
1481 : 13299 : memset(v.spl_risnull, true,
1482 : 13299 : sizeof(bool) * giststate->nonLeafTupdesc->natts);
4781 tgl@sss.pgh.pa.us 1483 : 13299 : gistSplitByKey(r, page, itup, len, giststate, &v, 0);
1484 : :
1485 : : /* form left and right vector */
95 michael@paquier.xyz 1486 :GNC 13299 : lvectup = palloc_array(IndexTuple, len + 1);
1487 : 13299 : rvectup = palloc_array(IndexTuple, len + 1);
1488 : :
7200 teodor@sigaev.ru 1489 [ + + ]:CBC 572606 : for (i = 0; i < v.splitVector.spl_nleft; i++)
1490 : 559307 : lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
1491 : :
1492 [ + + ]: 638814 : for (i = 0; i < v.splitVector.spl_nright; i++)
1493 : 625515 : rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
1494 : :
1495 : : /* finalize splitting (may need another split) */
1496 [ + + ]: 13299 : if (!gistfitpage(rvectup, v.splitVector.spl_nright))
1497 : : {
1498 : 346 : res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
1499 : : }
1500 : : else
1501 : : {
7249 1502 : 12953 : ROTATEDIST(res);
7200 1503 : 12953 : res->block.num = v.splitVector.spl_nright;
7102 bruce@momjian.us 1504 : 12953 : res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
5561 heikki.linnakangas@i 1505 : 12953 : res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
1506 : : }
1507 : :
7200 teodor@sigaev.ru 1508 [ + + ]: 13299 : if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
1509 : : {
1510 : : SplitPageLayout *resptr,
1511 : : *subres;
1512 : :
1513 : 236 : resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
1514 : :
1515 : : /* install on list's tail */
7102 bruce@momjian.us 1516 [ + + ]: 593 : while (resptr->next)
7249 teodor@sigaev.ru 1517 : 357 : resptr = resptr->next;
1518 : :
1519 : 236 : resptr->next = res;
1520 : 236 : res = subres;
1521 : : }
1522 : : else
1523 : : {
1524 : 13063 : ROTATEDIST(res);
7200 1525 : 13063 : res->block.num = v.splitVector.spl_nleft;
7102 bruce@momjian.us 1526 : 13063 : res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
5561 heikki.linnakangas@i 1527 : 13063 : res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
1528 : : }
1529 : :
7249 teodor@sigaev.ru 1530 : 13299 : return res;
1531 : : }
1532 : :
1533 : : /*
1534 : : * Create a GISTSTATE and fill it with information about the index
1535 : : */
1536 : : GISTSTATE *
5280 tgl@sss.pgh.pa.us 1537 : 4687 : initGISTstate(Relation index)
1538 : : {
1539 : : GISTSTATE *giststate;
1540 : : MemoryContext scanCxt;
1541 : : MemoryContext oldCxt;
1542 : : int i;
1543 : :
1544 : : /* safety check to protect fixed-size arrays in GISTSTATE */
8972 1545 [ - + ]: 4687 : if (index->rd_att->natts > INDEX_MAX_KEYS)
8273 tgl@sss.pgh.pa.us 1546 [ # # ]:UBC 0 : elog(ERROR, "numberOfAttributes %d > %d",
1547 : : index->rd_att->natts, INDEX_MAX_KEYS);
1548 : :
1549 : : /* Create the memory context that will hold the GISTSTATE */
5280 tgl@sss.pgh.pa.us 1550 :CBC 4687 : scanCxt = AllocSetContextCreate(CurrentMemoryContext,
1551 : : "GiST scan context",
1552 : : ALLOCSET_DEFAULT_SIZES);
1553 : 4687 : oldCxt = MemoryContextSwitchTo(scanCxt);
1554 : :
1555 : : /* Create and fill in the GISTSTATE */
95 michael@paquier.xyz 1556 :GNC 4687 : giststate = palloc_object(GISTSTATE);
1557 : :
5280 tgl@sss.pgh.pa.us 1558 :CBC 4687 : giststate->scanCxt = scanCxt;
3189 1559 : 4687 : giststate->tempCxt = scanCxt; /* caller must change this if needed */
2562 akorotkov@postgresql 1560 : 4687 : giststate->leafTupdesc = index->rd_att;
1561 : :
1562 : : /*
1563 : : * The truncated tupdesc for non-leaf index tuples, which doesn't contain
1564 : : * the INCLUDE attributes.
1565 : : *
1566 : : * It is used to form tuples during tuple adjustment and page split.
1567 : : * B-tree creates shortened tuple descriptor for every truncated tuple,
1568 : : * because it is doing this less often: it does not have to form truncated
1569 : : * tuples during page split. Also, B-tree is not adjusting tuples on
1570 : : * internal pages the way GiST does.
1571 : : */
450 drowley@postgresql.o 1572 : 9374 : giststate->nonLeafTupdesc = CreateTupleDescTruncatedCopy(index->rd_att,
1573 : 4687 : IndexRelationGetNumberOfKeyAttributes(index));
1574 : :
2562 akorotkov@postgresql 1575 [ + + ]: 12429 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)
1576 : : {
8926 tgl@sss.pgh.pa.us 1577 : 7742 : fmgr_info_copy(&(giststate->consistentFn[i]),
7607 neilc@samurai.com 1578 : 7742 : index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
1579 : : scanCxt);
8926 tgl@sss.pgh.pa.us 1580 : 7742 : fmgr_info_copy(&(giststate->unionFn[i]),
8907 bruce@momjian.us 1581 : 7742 : index_getprocinfo(index, i + 1, GIST_UNION_PROC),
1582 : : scanCxt);
1583 : :
1584 : : /* opclasses are not required to provide a Compress method */
3099 tgl@sss.pgh.pa.us 1585 [ + + ]: 7742 : if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))
1586 : 2696 : fmgr_info_copy(&(giststate->compressFn[i]),
1587 : 2696 : index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
1588 : : scanCxt);
1589 : : else
1590 : 5046 : giststate->compressFn[i].fn_oid = InvalidOid;
1591 : :
1592 : : /* opclasses are not required to provide a Decompress method */
1593 [ + + ]: 7742 : if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))
1594 : 812 : fmgr_info_copy(&(giststate->decompressFn[i]),
1595 : 812 : index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
1596 : : scanCxt);
1597 : : else
1598 : 6930 : giststate->decompressFn[i].fn_oid = InvalidOid;
1599 : :
8926 1600 : 7742 : fmgr_info_copy(&(giststate->penaltyFn[i]),
8907 bruce@momjian.us 1601 : 7742 : index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
1602 : : scanCxt);
8926 tgl@sss.pgh.pa.us 1603 : 7742 : fmgr_info_copy(&(giststate->picksplitFn[i]),
7607 neilc@samurai.com 1604 : 7742 : index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
1605 : : scanCxt);
8926 tgl@sss.pgh.pa.us 1606 : 7742 : fmgr_info_copy(&(giststate->equalFn[i]),
8907 bruce@momjian.us 1607 : 7742 : index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
1608 : : scanCxt);
1609 : :
1610 : : /* opclasses are not required to provide a Distance method */
5581 tgl@sss.pgh.pa.us 1611 [ + + ]: 7742 : if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
1612 : 958 : fmgr_info_copy(&(giststate->distanceFn[i]),
3189 1613 : 958 : index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
1614 : : scanCxt);
1615 : : else
5581 1616 : 6784 : giststate->distanceFn[i].fn_oid = InvalidOid;
1617 : :
1618 : : /* opclasses are not required to provide a Fetch method */
4007 heikki.linnakangas@i 1619 [ + + ]: 7742 : if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))
1620 : 689 : fmgr_info_copy(&(giststate->fetchFn[i]),
3949 bruce@momjian.us 1621 : 689 : index_getprocinfo(index, i + 1, GIST_FETCH_PROC),
1622 : : scanCxt);
1623 : : else
4007 heikki.linnakangas@i 1624 : 7053 : giststate->fetchFn[i].fn_oid = InvalidOid;
1625 : :
1626 : : /*
1627 : : * If the index column has a specified collation, we should honor that
1628 : : * while doing comparisons. However, we may have a collatable storage
1629 : : * type for a noncollatable indexed data type. If there's no index
1630 : : * collation then specify default collation in case the support
1631 : : * functions need collation. This is harmless if the support
1632 : : * functions don't care about collation, so we just do it
1633 : : * unconditionally. (We could alternatively call get_typcollation,
1634 : : * but that seems like expensive overkill --- there aren't going to be
1635 : : * any cases where a GiST storage type has a nondefault collation.)
1636 : : */
5441 tgl@sss.pgh.pa.us 1637 [ + + ]: 7742 : if (OidIsValid(index->rd_indcollation[i]))
1638 : 98 : giststate->supportCollation[i] = index->rd_indcollation[i];
1639 : : else
1640 : 7644 : giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
1641 : : }
1642 : :
1643 : : /* No opclass information for INCLUDE attributes */
2562 akorotkov@postgresql 1644 [ + + ]: 4938 : for (; i < index->rd_att->natts; i++)
1645 : : {
1646 : 251 : giststate->consistentFn[i].fn_oid = InvalidOid;
1647 : 251 : giststate->unionFn[i].fn_oid = InvalidOid;
1648 : 251 : giststate->compressFn[i].fn_oid = InvalidOid;
1649 : 251 : giststate->decompressFn[i].fn_oid = InvalidOid;
1650 : 251 : giststate->penaltyFn[i].fn_oid = InvalidOid;
1651 : 251 : giststate->picksplitFn[i].fn_oid = InvalidOid;
1652 : 251 : giststate->equalFn[i].fn_oid = InvalidOid;
1653 : 251 : giststate->distanceFn[i].fn_oid = InvalidOid;
1654 : 251 : giststate->fetchFn[i].fn_oid = InvalidOid;
1655 : 251 : giststate->supportCollation[i] = InvalidOid;
1656 : : }
1657 : :
5280 tgl@sss.pgh.pa.us 1658 : 4687 : MemoryContextSwitchTo(oldCxt);
1659 : :
1660 : 4687 : return giststate;
1661 : : }
1662 : :
1663 : : void
8907 bruce@momjian.us 1664 : 3583 : freeGISTstate(GISTSTATE *giststate)
1665 : : {
1666 : : /* It's sufficient to delete the scanCxt */
5280 tgl@sss.pgh.pa.us 1667 : 3583 : MemoryContextDelete(giststate->scanCxt);
10793 scrappy@hub.org 1668 : 3583 : }
1669 : :
1670 : : /*
1671 : : * gistprunepage() -- try to remove LP_DEAD items from the given page.
1672 : : * Function assumes that buffer is exclusively locked.
1673 : : */
1674 : : static void
2567 heikki.linnakangas@i 1675 :UBC 0 : gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
1676 : : {
1677 : : OffsetNumber deletable[MaxIndexTuplesPerPage];
3566 rhaas@postgresql.org 1678 : 0 : int ndeletable = 0;
1679 : : OffsetNumber offnum,
1680 : : maxoff;
1681 : :
3840 teodor@sigaev.ru 1682 [ # # ]: 0 : Assert(GistPageIsLeaf(page));
1683 : :
1684 : : /*
1685 : : * Scan over all items to see which ones need to be deleted according to
1686 : : * LP_DEAD flags.
1687 : : */
1688 : 0 : maxoff = PageGetMaxOffsetNumber(page);
1689 : 0 : for (offnum = FirstOffsetNumber;
1690 [ # # ]: 0 : offnum <= maxoff;
1691 : 0 : offnum = OffsetNumberNext(offnum))
1692 : : {
1693 : 0 : ItemId itemId = PageGetItemId(page, offnum);
1694 : :
1695 [ # # ]: 0 : if (ItemIdIsDead(itemId))
1696 : 0 : deletable[ndeletable++] = offnum;
1697 : : }
1698 : :
1699 [ # # ]: 0 : if (ndeletable > 0)
1700 : : {
1214 pg@bowt.ie 1701 : 0 : TransactionId snapshotConflictHorizon = InvalidTransactionId;
1702 : :
1874 1703 [ # # # # : 0 : if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
# # # # #
# ]
1704 : : snapshotConflictHorizon =
1705 : 0 : index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
1706 : : deletable, ndeletable);
1707 : :
3840 teodor@sigaev.ru 1708 : 0 : START_CRIT_SECTION();
1709 : :
1710 : 0 : PageIndexMultiDelete(page, deletable, ndeletable);
1711 : :
1712 : : /*
1713 : : * Mark the page as not containing any LP_DEAD items. This is not
1714 : : * certainly true (there might be some that have recently been marked,
1715 : : * but weren't included in our target-item list), but it will almost
1716 : : * always be true and it doesn't seem worth an additional page scan to
1717 : : * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
1718 : : */
1719 : 0 : GistClearPageHasGarbage(page);
1720 : :
1721 : 0 : MarkBufferDirty(buffer);
1722 : :
1723 : : /* XLOG stuff */
1724 [ # # # # : 0 : if (RelationNeedsWAL(rel))
# # # # ]
1725 : 0 : {
1726 : : XLogRecPtr recptr;
1727 : :
2641 akorotkov@postgresql 1728 : 0 : recptr = gistXLogDelete(buffer,
1729 : : deletable, ndeletable,
1730 : : snapshotConflictHorizon,
1731 : : heapRel);
1732 : :
3840 teodor@sigaev.ru 1733 : 0 : PageSetLSN(page, recptr);
1734 : : }
1735 : : else
2 pg@bowt.ie 1736 :UNC 0 : PageSetLSN(page, XLogGetFakeLSN(rel));
1737 : :
3840 teodor@sigaev.ru 1738 [ # # ]:UBC 0 : END_CRIT_SECTION();
1739 : : }
1740 : :
1741 : : /*
1742 : : * Note: if we didn't find any LP_DEAD items, then the page's
1743 : : * F_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
1744 : : * separate write to clear it, however. We will clear it when we split
1745 : : * the page.
1746 : : */
1747 : 0 : }
|