Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * nbtsplitloc.c
4 : : * Choose split point code for Postgres btree implementation.
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/nbtree/nbtsplitloc.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/nbtree.h"
18 : : #include "access/tableam.h"
19 : : #include "common/int.h"
20 : :
21 : : typedef enum
22 : : {
23 : : /* strategy for searching through materialized list of split points */
24 : : SPLIT_DEFAULT, /* give some weight to truncation */
25 : : SPLIT_MANY_DUPLICATES, /* find minimally distinguishing point */
26 : : SPLIT_SINGLE_VALUE, /* leave left page almost full */
27 : : } FindSplitStrat;
28 : :
29 : : typedef struct
30 : : {
31 : : /* details of free space left by split */
32 : : int16 curdelta; /* current leftfree/rightfree delta */
33 : : int16 leftfree; /* space left on left page post-split */
34 : : int16 rightfree; /* space left on right page post-split */
35 : :
36 : : /* split point identifying fields (returned by _bt_findsplitloc) */
37 : : OffsetNumber firstrightoff; /* first origpage item on rightpage */
38 : : bool newitemonleft; /* new item goes on left, or right? */
39 : : } SplitPoint;
40 : :
41 : : typedef struct
42 : : {
43 : : /* context data for _bt_recsplitloc */
44 : : Relation rel; /* index relation */
45 : : Page origpage; /* page undergoing split */
46 : : IndexTuple newitem; /* new item (cause of page split) */
47 : : Size newitemsz; /* size of newitem (includes line pointer) */
48 : : bool is_leaf; /* T if splitting a leaf page */
49 : : bool is_rightmost; /* T if splitting rightmost page on level */
50 : : OffsetNumber newitemoff; /* where the new item is to be inserted */
51 : : int leftspace; /* space available for items on left page */
52 : : int rightspace; /* space available for items on right page */
53 : : int olddataitemstotal; /* space taken by old items */
54 : : Size minfirstrightsz; /* smallest firstright size */
55 : :
56 : : /* candidate split point data */
57 : : int maxsplits; /* maximum number of splits */
58 : : int nsplits; /* current number of splits */
59 : : SplitPoint *splits; /* all candidate split points for page */
60 : : int interval; /* current range of acceptable split points */
61 : : } FindSplitData;
62 : :
63 : : static void _bt_recsplitloc(FindSplitData *state,
64 : : OffsetNumber firstrightoff, bool newitemonleft,
65 : : int olddataitemstoleft,
66 : : Size firstrightofforigpagetuplesz);
67 : : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
68 : : bool usemult);
69 : : static int _bt_splitcmp(const void *arg1, const void *arg2);
70 : : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
71 : : int leaffillfactor, bool *usemult);
72 : : static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
73 : : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
74 : : bool *newitemonleft, FindSplitStrat strategy);
75 : : static int _bt_defaultinterval(FindSplitData *state);
76 : : static int _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
77 : : SplitPoint *rightpage, FindSplitStrat *strategy);
78 : : static void _bt_interval_edges(FindSplitData *state,
79 : : SplitPoint **leftinterval, SplitPoint **rightinterval);
80 : : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
81 : : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
82 : : SplitPoint *split);
83 : : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
84 : : SplitPoint *split);
85 : :
86 : :
87 : : /*
88 : : * _bt_findsplitloc() -- find an appropriate place to split a page.
89 : : *
90 : : * The main goal here is to equalize the free space that will be on each
91 : : * split page, *after accounting for the inserted tuple*. (If we fail to
92 : : * account for it, we might find ourselves with too little room on the page
93 : : * that it needs to go into!)
94 : : *
95 : : * If the page is the rightmost page on its level, we instead try to arrange
96 : : * to leave the left split page fillfactor% full. In this way, when we are
97 : : * inserting successively increasing keys (consider sequences, timestamps,
98 : : * etc) we will end up with a tree whose pages are about fillfactor% full,
99 : : * instead of the 50% full result that we'd get without this special case.
100 : : * This is the same as nbtsort.c produces for a newly-created tree. Note
101 : : * that leaf and nonleaf pages use different fillfactors. Note also that
102 : : * there are a number of further special cases where fillfactor is not
103 : : * applied in the standard way.
104 : : *
105 : : * We are passed the intended insert position of the new tuple, expressed as
106 : : * the offsetnumber of the tuple it must go in front of (this could be
107 : : * maxoff+1 if the tuple is to go at the end). The new tuple itself is also
108 : : * passed, since it's needed to give some weight to how effective suffix
109 : : * truncation will be. The implementation picks the split point that
110 : : * maximizes the effectiveness of suffix truncation from a small list of
111 : : * alternative candidate split points that leave each side of the split with
112 : : * about the same share of free space. Suffix truncation is secondary to
113 : : * equalizing free space, except in cases with large numbers of duplicates.
114 : : * Note that it is always assumed that caller goes on to perform truncation,
115 : : * even with pg_upgrade'd indexes where that isn't actually the case
116 : : * (!heapkeyspace indexes). See nbtree/README for more information about
117 : : * suffix truncation.
118 : : *
119 : : * We return the index of the first existing tuple that should go on the
120 : : * righthand page (which is called firstrightoff), plus a boolean
121 : : * indicating whether the new tuple goes on the left or right page. You
122 : : * can think of the returned state as a point _between_ two adjacent data
123 : : * items (lastleft and firstright data items) on an imaginary version of
124 : : * origpage that already includes newitem. The bool is necessary to
125 : : * disambiguate the case where firstrightoff == newitemoff (i.e. it is
126 : : * sometimes needed to determine if the firstright tuple for the split is
127 : : * newitem rather than the tuple from origpage at offset firstrightoff).
128 : : */
129 : : OffsetNumber
2362 pg@bowt.ie 130 :CBC 11436 : _bt_findsplitloc(Relation rel,
131 : : Page origpage,
132 : : OffsetNumber newitemoff,
133 : : Size newitemsz,
134 : : IndexTuple newitem,
135 : : bool *newitemonleft)
136 : : {
137 : : BTPageOpaque opaque;
138 : : int leftspace,
139 : : rightspace,
140 : : olddataitemstotal,
141 : : olddataitemstoleft,
142 : : perfectpenalty,
143 : : leaffillfactor;
144 : : FindSplitData state;
145 : : FindSplitStrat strategy;
146 : : ItemId itemid;
147 : : OffsetNumber offnum,
148 : : maxoff,
149 : : firstrightoff;
150 : : double fillfactormult;
151 : : bool usemult;
152 : : SplitPoint leftpage,
153 : : rightpage;
154 : :
1254 michael@paquier.xyz 155 : 11436 : opaque = BTPageGetOpaque(origpage);
1972 pg@bowt.ie 156 : 11436 : maxoff = PageGetMaxOffsetNumber(origpage);
157 : :
158 : : /* Total free space available on a btree page, after fixed overhead */
2362 159 : 11436 : leftspace = rightspace =
1972 160 : 11436 : PageGetPageSize(origpage) - SizeOfPageHeaderData -
161 : : MAXALIGN(sizeof(BTPageOpaqueData));
162 : :
163 : : /* The right page will have the same high key as the old page */
2362 164 [ + + ]: 11436 : if (!P_RIGHTMOST(opaque))
165 : : {
1972 166 : 5044 : itemid = PageGetItemId(origpage, P_HIKEY);
2362 167 : 5044 : rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
168 : : sizeof(ItemIdData));
169 : : }
170 : :
171 : : /* Count up total space in data items before actually scanning 'em */
1972 172 : 11436 : olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
2112 michael@paquier.xyz 173 [ + - - + : 11436 : leaffillfactor = BTGetFillFactor(rel);
+ + ]
174 : :
175 : : /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
2362 pg@bowt.ie 176 : 11436 : newitemsz += sizeof(ItemIdData);
177 : 11436 : state.rel = rel;
1972 178 : 11436 : state.origpage = origpage;
2362 179 : 11436 : state.newitem = newitem;
180 : 11436 : state.newitemsz = newitemsz;
181 : 11436 : state.is_leaf = P_ISLEAF(opaque);
182 : 11436 : state.is_rightmost = P_RIGHTMOST(opaque);
183 : 11436 : state.leftspace = leftspace;
184 : 11436 : state.rightspace = rightspace;
185 : 11436 : state.olddataitemstotal = olddataitemstotal;
186 : 11436 : state.minfirstrightsz = SIZE_MAX;
187 : 11436 : state.newitemoff = newitemoff;
188 : :
189 : : /* newitem cannot be a posting list item */
2019 190 [ - + ]: 11436 : Assert(!BTreeTupleIsPosting(newitem));
191 : :
192 : : /*
193 : : * nsplits should never exceed maxoff because there will be at most as
194 : : * many candidate split points as there are points _between_ tuples, once
195 : : * you imagine that the new item is already on the original page (the
196 : : * final number of splits may be slightly lower because not all points
197 : : * between tuples will be legal).
198 : : */
2362 199 : 11436 : state.maxsplits = maxoff;
200 : 11436 : state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
201 : 11436 : state.nsplits = 0;
202 : :
203 : : /*
204 : : * Scan through the data items and calculate space usage for a split at
205 : : * each possible position
206 : : */
207 : 11436 : olddataitemstoleft = 0;
208 : :
209 [ + + ]: 11436 : for (offnum = P_FIRSTDATAKEY(opaque);
210 [ + + ]: 3471510 : offnum <= maxoff;
211 : 3460074 : offnum = OffsetNumberNext(offnum))
212 : : {
213 : : Size itemsz;
214 : :
1972 215 : 3460074 : itemid = PageGetItemId(origpage, offnum);
2362 216 : 3460074 : itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
217 : :
218 : : /*
219 : : * When item offset number is not newitemoff, neither side of the
220 : : * split can be newitem. Record a split after the previous data item
221 : : * from original page, but before the current data item from original
222 : : * page. (_bt_recsplitloc() will reject the split when there are no
223 : : * previous items, which we rely on.)
224 : : */
2306 225 [ + + ]: 3460074 : if (offnum < newitemoff)
2362 226 : 3018998 : _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
2306 227 [ + + ]: 441076 : else if (offnum > newitemoff)
228 : 434544 : _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
229 : : else
230 : : {
231 : : /*
232 : : * Record a split after all "offnum < newitemoff" original page
233 : : * data items, but before newitem
234 : : */
2362 235 : 6532 : _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
236 : :
237 : : /*
238 : : * Record a split after newitem, but before data item from
239 : : * original page at offset newitemoff/current offset
240 : : */
2306 241 : 6532 : _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
242 : : }
243 : :
2362 244 : 3460074 : olddataitemstoleft += itemsz;
245 : : }
246 : :
247 : : /*
248 : : * Record a split after all original page data items, but before newitem.
249 : : * (Though only when it's possible that newitem will end up alone on new
250 : : * right page.)
251 : : */
252 [ - + ]: 11436 : Assert(olddataitemstoleft == olddataitemstotal);
253 [ + + ]: 11436 : if (newitemoff > maxoff)
254 : 4904 : _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
255 : :
256 : : /*
257 : : * I believe it is not possible to fail to find a feasible split, but just
258 : : * in case ...
259 : : */
260 [ - + ]: 11436 : if (state.nsplits == 0)
2362 pg@bowt.ie 261 [ # # ]:UBC 0 : elog(ERROR, "could not find a feasible split point for index \"%s\"",
262 : : RelationGetRelationName(rel));
263 : :
264 : : /*
265 : : * Start search for a split point among list of legal split points. Give
266 : : * primary consideration to equalizing available free space in each half
267 : : * of the split initially (start with default strategy), while applying
268 : : * rightmost and split-after-new-item optimizations where appropriate.
269 : : * Either of the two other fallback strategies may be required for cases
270 : : * with a large number of duplicates around the original/space-optimal
271 : : * split point.
272 : : *
273 : : * Default strategy gives some weight to suffix truncation in deciding a
274 : : * split point on leaf pages. It attempts to select a split point where a
275 : : * distinguishing attribute appears earlier in the new high key for the
276 : : * left side of the split, in order to maximize the number of trailing
277 : : * attributes that can be truncated away. Only candidate split points
278 : : * that imply an acceptable balance of free space on each side are
279 : : * considered. See _bt_defaultinterval().
280 : : */
2362 pg@bowt.ie 281 [ + + ]:CBC 11436 : if (!state.is_leaf)
282 : : {
283 : : /* fillfactormult only used on rightmost page */
284 : 157 : usemult = state.is_rightmost;
285 : 157 : fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
286 : : }
287 [ + + ]: 11279 : else if (state.is_rightmost)
288 : : {
289 : : /* Rightmost leaf page -- fillfactormult always used */
290 : 6310 : usemult = true;
291 : 6310 : fillfactormult = leaffillfactor / 100.0;
292 : : }
2357 293 [ + + ]: 4969 : else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
294 : : {
295 : : /*
296 : : * New item inserted at rightmost point among a localized grouping on
297 : : * a leaf page -- apply "split after new item" optimization, either by
298 : : * applying leaf fillfactor multiplier, or by choosing the exact split
299 : : * point that leaves newitem as lastleft. (usemult is set for us.)
300 : : */
301 [ + + ]: 315 : if (usemult)
302 : : {
303 : : /* fillfactormult should be set based on leaf fillfactor */
304 : 227 : fillfactormult = leaffillfactor / 100.0;
305 : : }
306 : : else
307 : : {
308 : : /* find precise split point after newitemoff */
309 [ + - ]: 23808 : for (int i = 0; i < state.nsplits; i++)
310 : : {
311 : 23808 : SplitPoint *split = state.splits + i;
312 : :
313 [ + + ]: 23808 : if (split->newitemonleft &&
1972 314 [ + - ]: 88 : newitemoff == split->firstrightoff)
315 : : {
2357 316 : 88 : pfree(state.splits);
317 : 88 : *newitemonleft = true;
318 : 88 : return newitemoff;
319 : : }
320 : : }
321 : :
322 : : /*
323 : : * Cannot legally split after newitemoff; proceed with split
324 : : * without using fillfactor multiplier. This is defensive, and
325 : : * should never be needed in practice.
326 : : */
2357 pg@bowt.ie 327 :UBC 0 : fillfactormult = 0.50;
328 : : }
329 : : }
330 : : else
331 : : {
332 : : /* Other leaf page. 50:50 page split. */
2362 pg@bowt.ie 333 :CBC 4654 : usemult = false;
334 : : /* fillfactormult not used, but be tidy */
335 : 4654 : fillfactormult = 0.50;
336 : : }
337 : :
338 : : /*
339 : : * Save leftmost and rightmost splits for page before original ordinal
340 : : * sort order is lost by delta/fillfactormult sort
341 : : */
342 : 11348 : leftpage = state.splits[0];
343 : 11348 : rightpage = state.splits[state.nsplits - 1];
344 : :
345 : : /* Give split points a fillfactormult-wise delta, and sort on deltas */
346 : 11348 : _bt_deltasortsplits(&state, fillfactormult, usemult);
347 : :
348 : : /* Determine split interval for default strategy */
1964 349 : 11348 : state.interval = _bt_defaultinterval(&state);
350 : :
351 : : /*
352 : : * Determine if default strategy/split interval will produce a
353 : : * sufficiently distinguishing split, or if we should change strategies.
354 : : * Alternative strategies change the range of split points that are
355 : : * considered acceptable (split interval), and possibly change
356 : : * fillfactormult, in order to deal with pages with a large number of
357 : : * duplicates gracefully.
358 : : *
359 : : * Pass low and high splits for the entire page (actually, they're for an
360 : : * imaginary version of the page that includes newitem). These are used
361 : : * when the initial split interval encloses split points that are full of
362 : : * duplicates, and we need to consider if it's even possible to avoid
363 : : * appending a heap TID.
364 : : */
2362 365 : 11348 : perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
366 : :
367 [ + + ]: 11348 : if (strategy == SPLIT_DEFAULT)
368 : : {
369 : : /*
370 : : * Default strategy worked out (always works out with internal page).
371 : : * Original split interval still stands.
372 : : */
373 : : }
374 : :
375 : : /*
376 : : * Many duplicates strategy is used when a heap TID would otherwise be
377 : : * appended, but the page isn't completely full of logical duplicates.
378 : : *
379 : : * The split interval is widened to include all legal candidate split
380 : : * points. There might be a few as two distinct values in the whole-page
381 : : * split interval, though it's also possible that most of the values on
382 : : * the page are unique. The final split point will either be to the
383 : : * immediate left or to the immediate right of the group of duplicate
384 : : * tuples that enclose the first/delta-optimal split point (perfect
385 : : * penalty was set so that the lowest delta split point that avoids
386 : : * appending a heap TID will be chosen). Maximizing the number of
387 : : * attributes that can be truncated away is not a goal of the many
388 : : * duplicates strategy.
389 : : *
390 : : * Single value strategy is used when it is impossible to avoid appending
391 : : * a heap TID. It arranges to leave the left page very full. This
392 : : * maximizes space utilization in cases where tuples with the same
393 : : * attribute values span many pages. Newly inserted duplicates will tend
394 : : * to have higher heap TID values, so we'll end up splitting to the right
395 : : * consistently. (Single value strategy is harmless though not
396 : : * particularly useful with !heapkeyspace indexes.)
397 : : */
398 [ + + ]: 198 : else if (strategy == SPLIT_MANY_DUPLICATES)
399 : : {
400 [ - + ]: 71 : Assert(state.is_leaf);
401 : : /* Shouldn't try to truncate away extra user attributes */
2245 402 [ - + ]: 71 : Assert(perfectpenalty ==
403 : : IndexRelationGetNumberOfKeyAttributes(state.rel));
404 : : /* No need to resort splits -- no change in fillfactormult/deltas */
2362 405 : 71 : state.interval = state.nsplits;
406 : : }
407 [ + - ]: 127 : else if (strategy == SPLIT_SINGLE_VALUE)
408 : : {
409 [ - + ]: 127 : Assert(state.is_leaf);
410 : : /* Split near the end of the page */
411 : 127 : usemult = true;
412 : 127 : fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
413 : : /* Resort split points with new delta */
414 : 127 : _bt_deltasortsplits(&state, fillfactormult, usemult);
415 : : /* Appending a heap TID is unavoidable, so interval of 1 is fine */
416 : 127 : state.interval = 1;
417 : : }
418 : :
419 : : /*
420 : : * Search among acceptable split points (using final split interval) for
421 : : * the entry that has the lowest penalty, and is therefore expected to
422 : : * maximize fan-out. Sets *newitemonleft for us.
423 : : */
1972 424 : 11348 : firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
425 : : strategy);
2362 426 : 11348 : pfree(state.splits);
427 : :
1972 428 : 11348 : return firstrightoff;
429 : : }
430 : :
431 : : /*
432 : : * Subroutine to record a particular point between two tuples (possibly the
433 : : * new item) on page (ie, combination of firstrightoff and newitemonleft
434 : : * settings) in *state for later analysis. This is also a convenient point to
435 : : * check if the split is legal (if it isn't, it won't be recorded).
436 : : *
437 : : * firstrightoff is the offset of the first item on the original page that
438 : : * goes to the right page, and firstrightofforigpagetuplesz is the size of
439 : : * that tuple. firstrightoff can be > max offset, which means that all the
440 : : * old items go to the left page and only the new item goes to the right page.
441 : : * We don't actually use firstrightofforigpagetuplesz in that case (actually,
442 : : * we don't use it for _any_ split where the firstright tuple happens to be
443 : : * newitem).
444 : : *
445 : : * olddataitemstoleft is the total size of all old items to the left of the
446 : : * split point that is recorded here when legal. Should not include
447 : : * newitemsz, since that is handled here.
448 : : */
449 : : static void
2362 450 : 3471510 : _bt_recsplitloc(FindSplitData *state,
451 : : OffsetNumber firstrightoff,
452 : : bool newitemonleft,
453 : : int olddataitemstoleft,
454 : : Size firstrightofforigpagetuplesz)
455 : : {
456 : : int16 leftfree,
457 : : rightfree;
458 : : Size firstrightsz;
2019 459 : 3471510 : Size postingsz = 0;
460 : : bool newitemisfirstright;
461 : :
462 : : /* Is the new item going to be split point's firstright tuple? */
1972 463 [ + + ]: 3489478 : newitemisfirstright = (firstrightoff == state->newitemoff &&
464 [ + + ]: 17968 : !newitemonleft);
465 : :
466 [ + + ]: 3471510 : if (newitemisfirstright)
467 : 11436 : firstrightsz = state->newitemsz;
468 : : else
469 : : {
470 : 3460074 : firstrightsz = firstrightofforigpagetuplesz;
471 : :
472 : : /*
473 : : * Calculate suffix truncation space saving when firstright tuple is a
474 : : * posting list tuple, though only when the tuple is over 64 bytes
475 : : * including line pointer overhead (arbitrary). This avoids accessing
476 : : * the tuple in cases where its posting list must be very small (if
477 : : * tuple has one at all).
478 : : *
479 : : * Note: We don't do this in the case where firstright tuple is
480 : : * newitem, since newitem cannot have a posting list.
481 : : */
482 [ + + + + ]: 3460074 : if (state->is_leaf && firstrightsz > 64)
483 : : {
484 : : ItemId itemid;
485 : : IndexTuple newhighkey;
486 : :
487 : 25688 : itemid = PageGetItemId(state->origpage, firstrightoff);
488 : 25688 : newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
489 : :
2019 490 [ + + ]: 25688 : if (BTreeTupleIsPosting(newhighkey))
491 : 13318 : postingsz = IndexTupleSize(newhighkey) -
492 : 13318 : BTreeTupleGetPostingOffset(newhighkey);
493 : : }
494 : : }
495 : :
496 : : /* Account for all the old tuples */
2362 497 : 3471510 : leftfree = state->leftspace - olddataitemstoleft;
498 : 3471510 : rightfree = state->rightspace -
499 : 3471510 : (state->olddataitemstotal - olddataitemstoleft);
500 : :
501 : : /*
502 : : * The first item on the right page becomes the high key of the left page;
503 : : * therefore it counts against left space as well as right space (we
504 : : * cannot assume that suffix truncation will make it any smaller). When
505 : : * index has included attributes, then those attributes of left page high
506 : : * key will be truncated leaving that page with slightly more free space.
507 : : * However, that shouldn't affect our ability to find valid split
508 : : * location, since we err in the direction of being pessimistic about free
509 : : * space on the left half. Besides, even when suffix truncation of
510 : : * non-TID attributes occurs, the new high key often won't even be a
511 : : * single MAXALIGN() quantum smaller than the firstright tuple it's based
512 : : * on.
513 : : *
514 : : * If we are on the leaf level, assume that suffix truncation cannot avoid
515 : : * adding a heap TID to the left half's new high key when splitting at the
516 : : * leaf level. In practice the new high key will often be smaller and
517 : : * will rarely be larger, but conservatively assume the worst case. We do
518 : : * go to the trouble of subtracting away posting list overhead, though
519 : : * only when it looks like it will make an appreciable difference.
520 : : * (Posting lists are the only case where truncation will typically make
521 : : * the final high key far smaller than firstright, so being a bit more
522 : : * precise there noticeably improves the balance of free space.)
523 : : */
524 [ + + ]: 3471510 : if (state->is_leaf)
1972 525 : 3469441 : leftfree -= (int16) (firstrightsz +
2019 526 : 3469441 : MAXALIGN(sizeof(ItemPointerData)) -
527 : : postingsz);
528 : : else
1972 529 : 2069 : leftfree -= (int16) firstrightsz;
530 : :
531 : : /* account for the new item */
2362 532 [ + + ]: 3471510 : if (newitemonleft)
533 : 441076 : leftfree -= (int16) state->newitemsz;
534 : : else
535 : 3030434 : rightfree -= (int16) state->newitemsz;
536 : :
537 : : /*
538 : : * If we are not on the leaf level, we will be able to discard the key
539 : : * data from the first item that winds up on the right page.
540 : : */
541 [ + + ]: 3471510 : if (!state->is_leaf)
1972 542 : 2069 : rightfree += (int16) firstrightsz -
543 : : (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
544 : :
545 : : /* Record split if legal */
2362 546 [ + + + + ]: 3471510 : if (leftfree >= 0 && rightfree >= 0)
547 : : {
548 [ - + ]: 3450221 : Assert(state->nsplits < state->maxsplits);
549 : :
550 : : /* Determine smallest firstright tuple size among legal splits */
1972 551 : 3450221 : state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
552 : :
2362 553 : 3450221 : state->splits[state->nsplits].curdelta = 0;
554 : 3450221 : state->splits[state->nsplits].leftfree = leftfree;
555 : 3450221 : state->splits[state->nsplits].rightfree = rightfree;
1972 556 : 3450221 : state->splits[state->nsplits].firstrightoff = firstrightoff;
2362 557 : 3450221 : state->splits[state->nsplits].newitemonleft = newitemonleft;
558 : 3450221 : state->nsplits++;
559 : : }
560 : 3471510 : }
561 : :
562 : : /*
563 : : * Subroutine to assign space deltas to materialized array of candidate split
564 : : * points based on current fillfactor, and to sort array using that fillfactor
565 : : */
566 : : static void
567 : 11475 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
568 : : bool usemult)
569 : : {
570 [ + + ]: 3430945 : for (int i = 0; i < state->nsplits; i++)
571 : : {
572 : 3419470 : SplitPoint *split = state->splits + i;
573 : : int16 delta;
574 : :
575 [ + + ]: 3419470 : if (usemult)
576 : 2219362 : delta = fillfactormult * split->leftfree -
577 : 2219362 : (1.0 - fillfactormult) * split->rightfree;
578 : : else
579 : 1200108 : delta = split->leftfree - split->rightfree;
580 : :
581 [ + + ]: 3419470 : if (delta < 0)
582 : 822216 : delta = -delta;
583 : :
584 : : /* Save delta */
585 : 3419470 : split->curdelta = delta;
586 : : }
587 : :
588 : 11475 : qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
589 : 11475 : }
590 : :
591 : : /*
592 : : * qsort-style comparator used by _bt_deltasortsplits()
593 : : */
594 : : static int
595 : 36828786 : _bt_splitcmp(const void *arg1, const void *arg2)
596 : : {
597 : 36828786 : SplitPoint *split1 = (SplitPoint *) arg1;
598 : 36828786 : SplitPoint *split2 = (SplitPoint *) arg2;
599 : :
568 nathan@postgresql.or 600 : 36828786 : return pg_cmp_s16(split1->curdelta, split2->curdelta);
601 : : }
602 : :
603 : : /*
604 : : * Subroutine to determine whether or not a non-rightmost leaf page should be
605 : : * split immediately after the would-be original page offset for the
606 : : * new/incoming tuple (or should have leaf fillfactor applied when new item is
607 : : * to the right on original page). This is appropriate when there is a
608 : : * pattern of localized monotonically increasing insertions into a composite
609 : : * index, where leading attribute values form local groupings, and we
610 : : * anticipate further insertions of the same/current grouping (new item's
611 : : * grouping) in the near future. This can be thought of as a variation on
612 : : * applying leaf fillfactor during rightmost leaf page splits, since cases
613 : : * that benefit will converge on packing leaf pages leaffillfactor% full over
614 : : * time.
615 : : *
616 : : * We may leave extra free space remaining on the rightmost page of a "most
617 : : * significant column" grouping of tuples if that grouping never ends up
618 : : * having future insertions that use the free space. That effect is
619 : : * self-limiting; a future grouping that becomes the "nearest on the right"
620 : : * grouping of the affected grouping usually puts the extra free space to good
621 : : * use.
622 : : *
623 : : * Caller uses optimization when routine returns true, though the exact action
624 : : * taken by caller varies. Caller uses original leaf page fillfactor in
625 : : * standard way rather than using the new item offset directly when *usemult
626 : : * was also set to true here. Otherwise, caller applies optimization by
627 : : * locating the legal split point that makes the new tuple the lastleft tuple
628 : : * for the split.
629 : : */
630 : : static bool
2357 pg@bowt.ie 631 : 4969 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
632 : : int leaffillfactor, bool *usemult)
633 : : {
634 : : int16 nkeyatts;
635 : : ItemId itemid;
636 : : IndexTuple tup;
637 : : int keepnatts;
638 : :
639 [ + - - + ]: 4969 : Assert(state->is_leaf && !state->is_rightmost);
640 : :
641 : 4969 : nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
642 : :
643 : : /* Single key indexes not considered here */
644 [ + + ]: 4969 : if (nkeyatts == 1)
645 : 566 : return false;
646 : :
647 : : /* Ascending insertion pattern never inferred when new item is first */
648 [ + + ]: 4403 : if (state->newitemoff == P_FIRSTKEY)
649 : 1 : return false;
650 : :
651 : : /*
652 : : * Only apply optimization on pages with equisized tuples, since ordinal
653 : : * keys are likely to be fixed-width. Testing if the new tuple is
654 : : * variable width directly might also work, but that fails to apply the
655 : : * optimization to indexes with a numeric_ops attribute.
656 : : *
657 : : * Conclude that page has equisized tuples when the new item is the same
658 : : * width as the smallest item observed during pass over page, and other
659 : : * non-pivot tuples must be the same width as well. (Note that the
660 : : * possibly-truncated existing high key isn't counted in
661 : : * olddataitemstotal, and must be subtracted from maxoff.)
662 : : */
663 [ + + ]: 4402 : if (state->newitemsz != state->minfirstrightsz)
664 : 1502 : return false;
665 [ + + ]: 2900 : if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
666 : 2209 : return false;
667 : :
668 : : /*
669 : : * Avoid applying optimization when tuples are wider than a tuple
670 : : * consisting of two non-NULL int8/int64 attributes (or four non-NULL
671 : : * int4/int32 attributes)
672 : : */
673 [ - + ]: 691 : if (state->newitemsz >
674 : : MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
675 : : sizeof(ItemIdData))
2357 pg@bowt.ie 676 :UBC 0 : return false;
677 : :
678 : : /*
679 : : * At least the first attribute's value must be equal to the corresponding
680 : : * value in previous tuple to apply optimization. New item cannot be a
681 : : * duplicate, either.
682 : : *
683 : : * Handle case where new item is to the right of all items on the existing
684 : : * page. This is suggestive of monotonically increasing insertions in
685 : : * itself, so the "heap TID adjacency" test is not applied here.
686 : : */
2357 pg@bowt.ie 687 [ + + ]:CBC 691 : if (state->newitemoff > maxoff)
688 : : {
1972 689 : 208 : itemid = PageGetItemId(state->origpage, maxoff);
690 : 208 : tup = (IndexTuple) PageGetItem(state->origpage, itemid);
2357 691 : 208 : keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
692 : :
693 [ + - + - ]: 208 : if (keepnatts > 1 && keepnatts <= nkeyatts)
694 : : {
695 : 208 : *usemult = true;
696 : 208 : return true;
697 : : }
698 : :
2357 pg@bowt.ie 699 :LBC (2) : return false;
700 : : }
701 : :
702 : : /*
703 : : * "Low cardinality leading column, high cardinality suffix column"
704 : : * indexes with a random insertion pattern (e.g., an index with a boolean
705 : : * column, such as an index on '(book_is_in_print, book_isbn)') present us
706 : : * with a risk of consistently misapplying the optimization. We're
707 : : * willing to accept very occasional misapplication of the optimization,
708 : : * provided the cases where we get it wrong are rare and self-limiting.
709 : : *
710 : : * Heap TID adjacency strongly suggests that the item just to the left was
711 : : * inserted very recently, which limits overapplication of the
712 : : * optimization. Besides, all inappropriate cases triggered here will
713 : : * still split in the middle of the page on average.
714 : : */
1972 pg@bowt.ie 715 :CBC 483 : itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
716 : 483 : tup = (IndexTuple) PageGetItem(state->origpage, itemid);
717 : : /* Do cheaper test first */
2019 718 [ + - ]: 483 : if (BTreeTupleIsPosting(tup) ||
719 [ + + ]: 483 : !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
2357 720 : 359 : return false;
721 : : /* Check same conditions as rightmost item case, too */
722 : 124 : keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
723 : :
724 [ + + + - ]: 124 : if (keepnatts > 1 && keepnatts <= nkeyatts)
725 : : {
726 : 107 : double interp = (double) state->newitemoff / ((double) maxoff + 1);
727 : 107 : double leaffillfactormult = (double) leaffillfactor / 100.0;
728 : :
729 : : /*
730 : : * Don't allow caller to split after a new item when it will result in
731 : : * a split point to the right of the point that a leaf fillfactor
732 : : * split would use -- have caller apply leaf fillfactor instead
733 : : */
734 : 107 : *usemult = interp > leaffillfactormult;
735 : :
736 : 107 : return true;
737 : : }
738 : :
739 : 17 : return false;
740 : : }
741 : :
742 : : /*
743 : : * Subroutine for determining if two heap TIDS are "adjacent".
744 : : *
745 : : * Adjacent means that the high TID is very likely to have been inserted into
746 : : * heap relation immediately after the low TID, probably during the current
747 : : * transaction.
748 : : */
749 : : static bool
750 : 483 : _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
751 : : {
752 : : BlockNumber lowblk,
753 : : highblk;
754 : :
755 : 483 : lowblk = ItemPointerGetBlockNumber(lowhtid);
756 : 483 : highblk = ItemPointerGetBlockNumber(highhtid);
757 : :
758 : : /* Make optimistic assumption of adjacency when heap blocks match */
759 [ + + ]: 483 : if (lowblk == highblk)
760 : 124 : return true;
761 : :
762 : : /* When heap block one up, second offset should be FirstOffsetNumber */
763 [ + + - + ]: 462 : if (lowblk + 1 == highblk &&
764 : 103 : ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
2357 pg@bowt.ie 765 :LBC (2) : return true;
766 : :
2357 pg@bowt.ie 767 :CBC 359 : return false;
768 : : }
769 : :
770 : : /*
771 : : * Subroutine to find the "best" split point among candidate split points.
772 : : * The best split point is the split point with the lowest penalty among split
773 : : * points that fall within current/final split interval. Penalty is an
774 : : * abstract score, with a definition that varies depending on whether we're
775 : : * splitting a leaf page or an internal page. See _bt_split_penalty() for
776 : : * details.
777 : : *
778 : : * "perfectpenalty" is assumed to be the lowest possible penalty among
779 : : * candidate split points. This allows us to return early without wasting
780 : : * cycles on calculating the first differing attribute for all candidate
781 : : * splits when that clearly cannot improve our choice (or when we only want a
782 : : * minimally distinguishing split point, and don't want to make the split any
783 : : * more unbalanced than is necessary).
784 : : *
785 : : * We return the index of the first existing tuple that should go on the right
786 : : * page, plus a boolean indicating if new item is on left of split point.
787 : : */
788 : : static OffsetNumber
2245 789 : 11348 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
790 : : bool *newitemonleft, FindSplitStrat strategy)
791 : : {
792 : : int bestpenalty,
793 : : lowsplit;
2362 794 : 11348 : int highsplit = Min(state->interval, state->nsplits);
795 : : SplitPoint *final;
796 : :
797 : 11348 : bestpenalty = INT_MAX;
798 : 11348 : lowsplit = 0;
799 [ + + ]: 25132 : for (int i = lowsplit; i < highsplit; i++)
800 : : {
801 : : int penalty;
802 : :
803 : 25113 : penalty = _bt_split_penalty(state, state->splits + i);
804 : :
805 [ + + ]: 25113 : if (penalty < bestpenalty)
806 : : {
807 : 14302 : bestpenalty = penalty;
808 : 14302 : lowsplit = i;
809 : : }
810 : :
1970 811 [ + + ]: 25113 : if (penalty <= perfectpenalty)
812 : 11329 : break;
813 : : }
814 : :
2245 815 : 11348 : final = &state->splits[lowsplit];
816 : :
817 : : /*
818 : : * There is a risk that the "many duplicates" strategy will repeatedly do
819 : : * the wrong thing when there are monotonically decreasing insertions to
820 : : * the right of a large group of duplicates. Repeated splits could leave
821 : : * a succession of right half pages with free space that can never be
822 : : * used. This must be avoided.
823 : : *
824 : : * Consider the example of the leftmost page in a single integer attribute
825 : : * NULLS FIRST index which is almost filled with NULLs. Monotonically
826 : : * decreasing integer insertions might cause the same leftmost page to
827 : : * split repeatedly at the same point. Each split derives its new high
828 : : * key from the lowest current value to the immediate right of the large
829 : : * group of NULLs, which will always be higher than all future integer
830 : : * insertions, directing all future integer insertions to the same
831 : : * leftmost page.
832 : : */
833 [ + + + + ]: 11348 : if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
1972 834 [ + + - + ]: 65 : !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
1964 pg@bowt.ie 835 [ # # ]:UBC 0 : final->firstrightoff < state->newitemoff + 9)
836 : : {
837 : : /*
838 : : * Avoid the problem by performing a 50:50 split when the new item is
839 : : * just to the right of the would-be "many duplicates" split point.
840 : : * (Note that the test used for an insert that is "just to the right"
841 : : * of the split point is conservative.)
842 : : */
2245 843 : 0 : final = &state->splits[0];
844 : : }
845 : :
2245 pg@bowt.ie 846 :CBC 11348 : *newitemonleft = final->newitemonleft;
1972 847 : 11348 : return final->firstrightoff;
848 : : }
849 : :
850 : : #define LEAF_SPLIT_DISTANCE 0.050
851 : : #define INTERNAL_SPLIT_DISTANCE 0.075
852 : :
853 : : /*
854 : : * Return a split interval to use for the default strategy. This is a limit
855 : : * on the number of candidate split points to give further consideration to.
856 : : * Only a fraction of all candidate splits points (those located at the start
857 : : * of the now-sorted splits array) fall within the split interval. Split
858 : : * interval is applied within _bt_bestsplitloc().
859 : : *
860 : : * Split interval represents an acceptable range of split points -- those that
861 : : * have leftfree and rightfree values that are acceptably balanced. The final
862 : : * split point chosen is the split point with the lowest "penalty" among split
863 : : * points in this split interval (unless we change our entire strategy, in
864 : : * which case the interval also changes -- see _bt_strategy()).
865 : : *
866 : : * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
867 : : * and sigma b for internal ("branch") splits. It's hard to provide a
868 : : * theoretical justification for the size of the split interval, though it's
869 : : * clear that a small split interval can make tuples on level L+1 much smaller
870 : : * on average, without noticeably affecting space utilization on level L.
871 : : * (Note that the way that we calculate split interval might need to change if
872 : : * suffix truncation is taught to truncate tuples "within" the last
873 : : * attribute/datum for data types like text, which is more or less how it is
874 : : * assumed to work in the paper.)
875 : : */
876 : : static int
1964 877 : 11348 : _bt_defaultinterval(FindSplitData *state)
878 : : {
879 : : SplitPoint *spaceoptimal;
880 : : int16 tolerance,
881 : : lowleftfree,
882 : : lowrightfree,
883 : : highleftfree,
884 : : highrightfree;
885 : :
886 : : /*
887 : : * Determine leftfree and rightfree values that are higher and lower than
888 : : * we're willing to tolerate. Note that the final split interval will be
889 : : * about 10% of nsplits in the common case where all non-pivot tuples
890 : : * (data items) from a leaf page are uniformly sized. We're a bit more
891 : : * aggressive when splitting internal pages.
892 : : */
893 [ + + ]: 11348 : if (state->is_leaf)
894 : 11191 : tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
895 : : else
896 : 157 : tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
897 : :
898 : : /* First candidate split point is the most evenly balanced */
899 : 11348 : spaceoptimal = state->splits;
900 : 11348 : lowleftfree = spaceoptimal->leftfree - tolerance;
901 : 11348 : lowrightfree = spaceoptimal->rightfree - tolerance;
902 : 11348 : highleftfree = spaceoptimal->leftfree + tolerance;
903 : 11348 : highrightfree = spaceoptimal->rightfree + tolerance;
904 : :
905 : : /*
906 : : * Iterate through split points, starting from the split immediately after
907 : : * 'spaceoptimal'. Find the first split point that divides free space so
908 : : * unevenly that including it in the split interval would be unacceptable.
909 : : */
910 [ + - ]: 345113 : for (int i = 1; i < state->nsplits; i++)
911 : : {
912 : 345113 : SplitPoint *split = state->splits + i;
913 : :
914 : : /* Cannot use curdelta here, since its value is often weighted */
915 [ + + + + ]: 345113 : if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
916 [ + + + + ]: 334138 : split->leftfree > highleftfree || split->rightfree > highrightfree)
917 : 11348 : return i;
918 : : }
919 : :
1964 pg@bowt.ie 920 :UBC 0 : return state->nsplits;
921 : : }
922 : :
923 : : /*
924 : : * Subroutine to decide whether split should use default strategy/initial
925 : : * split interval, or whether it should finish splitting the page using
926 : : * alternative strategies (this is only possible with leaf pages).
927 : : *
928 : : * Caller uses alternative strategy (or sticks with default strategy) based
929 : : * on how *strategy is set here. Return value is "perfect penalty", which is
930 : : * passed to _bt_bestsplitloc() as a final constraint on how far caller is
931 : : * willing to go to avoid appending a heap TID when using the many duplicates
932 : : * strategy (it also saves _bt_bestsplitloc() useless cycles).
933 : : */
934 : : static int
2362 pg@bowt.ie 935 :CBC 11348 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
936 : : SplitPoint *rightpage, FindSplitStrat *strategy)
937 : : {
938 : : IndexTuple leftmost,
939 : : rightmost;
940 : : SplitPoint *leftinterval,
941 : : *rightinterval;
942 : : int perfectpenalty;
943 : 11348 : int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
944 : :
945 : : /* Assume that alternative strategy won't be used for now */
946 : 11348 : *strategy = SPLIT_DEFAULT;
947 : :
948 : : /*
949 : : * Use smallest observed firstright item size for entire page (actually,
950 : : * entire imaginary version of page that includes newitem) as perfect
951 : : * penalty on internal pages. This can save cycles in the common case
952 : : * where most or all splits (not just splits within interval) have
953 : : * firstright tuples that are the same size.
954 : : */
955 [ + + ]: 11348 : if (!state->is_leaf)
956 : 157 : return state->minfirstrightsz;
957 : :
958 : : /*
959 : : * Use leftmost and rightmost tuples from leftmost and rightmost splits in
960 : : * current split interval
961 : : */
962 : 11191 : _bt_interval_edges(state, &leftinterval, &rightinterval);
963 : 11191 : leftmost = _bt_split_lastleft(state, leftinterval);
964 : 11191 : rightmost = _bt_split_firstright(state, rightinterval);
965 : :
966 : : /*
967 : : * If initial split interval can produce a split point that will at least
968 : : * avoid appending a heap TID in new high key, we're done. Finish split
969 : : * with default strategy and initial split interval.
970 : : */
971 : 11191 : perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
972 [ + + ]: 11191 : if (perfectpenalty <= indnkeyatts)
973 : 10942 : return perfectpenalty;
974 : :
975 : : /*
976 : : * Work out how caller should finish split when even their "perfect"
977 : : * penalty for initial/default split interval indicates that the interval
978 : : * does not contain even a single split that avoids appending a heap TID.
979 : : *
980 : : * Use the leftmost split's lastleft tuple and the rightmost split's
981 : : * firstright tuple to assess every possible split.
982 : : */
983 : 249 : leftmost = _bt_split_lastleft(state, leftpage);
984 : 249 : rightmost = _bt_split_firstright(state, rightpage);
985 : :
986 : : /*
987 : : * If page (including new item) has many duplicates but is not entirely
988 : : * full of duplicates, a many duplicates strategy split will be performed.
989 : : * If page is entirely full of duplicates, a single value strategy split
990 : : * will be performed.
991 : : */
992 : 249 : perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
993 [ + + ]: 249 : if (perfectpenalty <= indnkeyatts)
994 : : {
995 : 71 : *strategy = SPLIT_MANY_DUPLICATES;
996 : :
997 : : /*
998 : : * Many duplicates strategy should split at either side the group of
999 : : * duplicates that enclose the delta-optimal split point. Return
1000 : : * indnkeyatts rather than the true perfect penalty to make that
1001 : : * happen. (If perfectpenalty was returned here then low cardinality
1002 : : * composite indexes could have continual unbalanced splits.)
1003 : : *
1004 : : * Note that caller won't go through with a many duplicates split in
1005 : : * rare cases where it looks like there are ever-decreasing insertions
1006 : : * to the immediate right of the split point. This must happen just
1007 : : * before a final decision is made, within _bt_bestsplitloc().
1008 : : */
1009 : 71 : return indnkeyatts;
1010 : : }
1011 : :
1012 : : /*
1013 : : * Single value strategy is only appropriate with ever-increasing heap
1014 : : * TIDs; otherwise, original default strategy split should proceed to
1015 : : * avoid pathological performance. Use page high key to infer if this is
1016 : : * the rightmost page among pages that store the same duplicate value.
1017 : : * This should not prevent insertions of heap TIDs that are slightly out
1018 : : * of order from using single value strategy, since that's expected with
1019 : : * concurrent inserters of the same duplicate value.
1020 : : */
1021 [ + + ]: 178 : else if (state->is_rightmost)
1022 : 108 : *strategy = SPLIT_SINGLE_VALUE;
1023 : : else
1024 : : {
1025 : : ItemId itemid;
1026 : : IndexTuple hikey;
1027 : :
1972 1028 : 70 : itemid = PageGetItemId(state->origpage, P_HIKEY);
1029 : 70 : hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
2362 1030 : 70 : perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
1031 : : state->newitem);
1032 [ + + ]: 70 : if (perfectpenalty <= indnkeyatts)
1033 : 19 : *strategy = SPLIT_SINGLE_VALUE;
1034 : : else
1035 : : {
1036 : : /*
1037 : : * Have caller finish split using default strategy, since page
1038 : : * does not appear to be the rightmost page for duplicates of the
1039 : : * value the page is filled with
1040 : : */
1041 : : }
1042 : : }
1043 : :
1044 : 178 : return perfectpenalty;
1045 : : }
1046 : :
1047 : : /*
1048 : : * Subroutine to locate leftmost and rightmost splits for current/default
1049 : : * split interval. Note that it will be the same split iff there is only one
1050 : : * split in interval.
1051 : : */
1052 : : static void
1053 : 11191 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
1054 : : SplitPoint **rightinterval)
1055 : : {
1056 : 11191 : int highsplit = Min(state->interval, state->nsplits);
1057 : : SplitPoint *deltaoptimal;
1058 : :
1059 : 11191 : deltaoptimal = state->splits;
1060 : 11191 : *leftinterval = NULL;
1061 : 11191 : *rightinterval = NULL;
1062 : :
1063 : : /*
1064 : : * Delta is an absolute distance to optimal split point, so both the
1065 : : * leftmost and rightmost split point will usually be at the end of the
1066 : : * array
1067 : : */
1068 [ + - ]: 22637 : for (int i = highsplit - 1; i >= 0; i--)
1069 : : {
1070 : 22637 : SplitPoint *distant = state->splits + i;
1071 : :
1972 1072 [ + + ]: 22637 : if (distant->firstrightoff < deltaoptimal->firstrightoff)
1073 : : {
2362 1074 [ + + ]: 11117 : if (*leftinterval == NULL)
1075 : 10940 : *leftinterval = distant;
1076 : : }
1972 1077 [ + + ]: 11520 : else if (distant->firstrightoff > deltaoptimal->firstrightoff)
1078 : : {
2362 1079 [ + + ]: 11254 : if (*rightinterval == NULL)
1080 : 10979 : *rightinterval = distant;
1081 : : }
1082 [ + + + + ]: 266 : else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
1083 : : {
1084 : : /*
1085 : : * "incoming tuple will become firstright" (distant) is to the
1086 : : * left of "incoming tuple will become lastleft" (delta-optimal)
1087 : : */
1972 1088 [ - + ]: 1 : Assert(distant->firstrightoff == state->newitemoff);
2362 1089 [ - + ]: 1 : if (*leftinterval == NULL)
2362 pg@bowt.ie 1090 :LBC (1) : *leftinterval = distant;
1091 : : }
2362 pg@bowt.ie 1092 [ + + + + ]:CBC 265 : else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
1093 : : {
1094 : : /*
1095 : : * "incoming tuple will become lastleft" (distant) is to the right
1096 : : * of "incoming tuple will become firstright" (delta-optimal)
1097 : : */
1972 1098 [ - + ]: 6 : Assert(distant->firstrightoff == state->newitemoff);
2362 1099 [ + + ]: 6 : if (*rightinterval == NULL)
1100 : 3 : *rightinterval = distant;
1101 : : }
1102 : : else
1103 : : {
1104 : : /* There was only one or two splits in initial split interval */
1105 [ - + ]: 259 : Assert(distant == deltaoptimal);
1106 [ + + ]: 259 : if (*leftinterval == NULL)
1107 : 251 : *leftinterval = distant;
1108 [ + + ]: 259 : if (*rightinterval == NULL)
1109 : 209 : *rightinterval = distant;
1110 : : }
1111 : :
1112 [ + + + + ]: 22637 : if (*leftinterval && *rightinterval)
1113 : 11191 : return;
1114 : : }
1115 : :
2362 pg@bowt.ie 1116 :UBC 0 : Assert(false);
1117 : : }
1118 : :
1119 : : /*
1120 : : * Subroutine to find penalty for caller's candidate split point.
1121 : : *
1122 : : * On leaf pages, penalty is the attribute number that distinguishes each side
1123 : : * of a split. It's the last attribute that needs to be included in new high
1124 : : * key for left page. It can be greater than the number of key attributes in
1125 : : * cases where a heap TID will need to be appended during truncation.
1126 : : *
1127 : : * On internal pages, penalty is simply the size of the firstright tuple for
1128 : : * the split (including line pointer overhead). This tuple will become the
1129 : : * new high key for the left page.
1130 : : */
1131 : : static inline int
2362 pg@bowt.ie 1132 :CBC 25113 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
1133 : : {
1134 : : IndexTuple lastleft;
1135 : : IndexTuple firstright;
1136 : :
1137 [ + + ]: 25113 : if (!state->is_leaf)
1138 : : {
1139 : : ItemId itemid;
1140 : :
1141 [ + - ]: 165 : if (!split->newitemonleft &&
1972 1142 [ + + ]: 165 : split->firstrightoff == state->newitemoff)
2362 1143 : 18 : return state->newitemsz;
1144 : :
1972 1145 : 147 : itemid = PageGetItemId(state->origpage, split->firstrightoff);
1146 : :
2362 1147 : 147 : return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
1148 : : }
1149 : :
1972 1150 : 24948 : lastleft = _bt_split_lastleft(state, split);
1151 : 24948 : firstright = _bt_split_firstright(state, split);
1152 : :
1153 : 24948 : return _bt_keep_natts_fast(state->rel, lastleft, firstright);
1154 : : }
1155 : :
1156 : : /*
1157 : : * Subroutine to get a lastleft IndexTuple for a split point
1158 : : */
1159 : : static inline IndexTuple
2362 1160 : 36388 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
1161 : : {
1162 : : ItemId itemid;
1163 : :
1972 1164 [ + + + + ]: 36388 : if (split->newitemonleft && split->firstrightoff == state->newitemoff)
2362 1165 : 260 : return state->newitem;
1166 : :
1972 1167 : 36128 : itemid = PageGetItemId(state->origpage,
1168 : 36128 : OffsetNumberPrev(split->firstrightoff));
1169 : 36128 : return (IndexTuple) PageGetItem(state->origpage, itemid);
1170 : : }
1171 : :
1172 : : /*
1173 : : * Subroutine to get a firstright IndexTuple for a split point
1174 : : */
1175 : : static inline IndexTuple
2362 1176 : 36388 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
1177 : : {
1178 : : ItemId itemid;
1179 : :
1972 1180 [ + + + + ]: 36388 : if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
2362 1181 : 108 : return state->newitem;
1182 : :
1972 1183 : 36280 : itemid = PageGetItemId(state->origpage, split->firstrightoff);
1184 : 36280 : return (IndexTuple) PageGetItem(state->origpage, itemid);
1185 : : }
|