Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * fsmpage.c
4 : : * routines to search and manipulate one FSM page.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/storage/freespace/fsmpage.c
12 : : *
13 : : * NOTES:
14 : : *
15 : : * The public functions in this file form an API that hides the internal
16 : : * structure of a FSM page. This allows freespace.c to treat each FSM page
17 : : * as a black box with SlotsPerPage "slots". fsm_set_avail() and
18 : : * fsm_get_avail() let you get/set the value of a slot, and
19 : : * fsm_search_avail() lets you search for a slot with value >= X.
20 : : *
21 : : *-------------------------------------------------------------------------
22 : : */
23 : : #include "postgres.h"
24 : :
25 : : #include "storage/bufmgr.h"
26 : : #include "storage/fsm_internals.h"
27 : :
28 : : /* Macros to navigate the tree within a page. Root has index zero. */
29 : : #define leftchild(x) (2 * (x) + 1)
30 : : #define rightchild(x) (2 * (x) + 2)
31 : : #define parentof(x) (((x) - 1) / 2)
32 : :
33 : : /*
34 : : * Find right neighbor of x, wrapping around within the level
35 : : */
36 : : static int
6368 tgl@sss.pgh.pa.us 37 :CBC 69889 : rightneighbor(int x)
38 : : {
39 : : /*
40 : : * Move right. This might wrap around, stepping to the leftmost node at
41 : : * the next level.
42 : : */
6375 heikki.linnakangas@i 43 : 69889 : x++;
44 : :
45 : : /*
46 : : * Check if we stepped to the leftmost node at next level, and correct if
47 : : * so. The leftmost nodes at each level are numbered x = 2^level - 1, so
48 : : * check if (x + 1) is a power of two, using a standard
49 : : * twos-complement-arithmetic trick.
50 : : */
51 [ + + ]: 69889 : if (((x + 1) & x) == 0)
52 : 3718 : x = parentof(x);
53 : :
54 : 69889 : return x;
55 : : }
56 : :
57 : : /*
58 : : * Sets the value of a slot on page. Returns true if the page was modified.
59 : : *
60 : : * The caller must hold an exclusive lock on the page.
61 : : */
62 : : bool
63 : 517168 : fsm_set_avail(Page page, int slot, uint8 value)
64 : : {
6121 bruce@momjian.us 65 : 517168 : int nodeno = NonLeafNodesPerPage + slot;
66 : 517168 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
67 : : uint8 oldvalue;
68 : :
6375 heikki.linnakangas@i 69 [ - + ]: 517168 : Assert(slot < LeafNodesPerPage);
70 : :
71 : 517168 : oldvalue = fsmpage->fp_nodes[nodeno];
72 : :
73 : : /* If the value hasn't changed, we don't need to do anything */
74 [ + + + - ]: 517168 : if (oldvalue == value && value <= fsmpage->fp_nodes[0])
75 : 101326 : return false;
76 : :
77 : 415842 : fsmpage->fp_nodes[nodeno] = value;
78 : :
79 : : /*
80 : : * Propagate up, until we hit the root or a node that doesn't need to be
81 : : * updated.
82 : : */
83 : : do
84 : : {
6121 bruce@momjian.us 85 : 3651702 : uint8 newvalue = 0;
86 : : int lchild;
87 : : int rchild;
88 : :
6375 heikki.linnakangas@i 89 : 3651702 : nodeno = parentof(nodeno);
90 : 3651702 : lchild = leftchild(nodeno);
91 : 3651702 : rchild = lchild + 1;
92 : :
93 : 3651702 : newvalue = fsmpage->fp_nodes[lchild];
94 [ + + ]: 3651702 : if (rchild < NodesPerPage)
95 : 3651700 : newvalue = Max(newvalue,
96 : : fsmpage->fp_nodes[rchild]);
97 : :
98 : 3651702 : oldvalue = fsmpage->fp_nodes[nodeno];
99 [ + + ]: 3651702 : if (oldvalue == newvalue)
100 : 140183 : break;
101 : :
102 : 3511519 : fsmpage->fp_nodes[nodeno] = newvalue;
103 [ + + ]: 3511519 : } while (nodeno > 0);
104 : :
105 : : /*
106 : : * sanity check: if the new value is (still) higher than the value at the
107 : : * top, the tree is corrupt. If so, rebuild.
108 : : */
109 [ - + ]: 415842 : if (value > fsmpage->fp_nodes[0])
6375 heikki.linnakangas@i 110 :UBC 0 : fsm_rebuild_page(page);
111 : :
6375 heikki.linnakangas@i 112 :CBC 415842 : return true;
113 : : }
114 : :
115 : : /*
116 : : * Returns the value of given slot on page.
117 : : *
118 : : * Since this is just a read-only access of a single byte, the page doesn't
119 : : * need to be locked.
120 : : */
121 : : uint8
122 : 1698443 : fsm_get_avail(Page page, int slot)
123 : : {
6121 bruce@momjian.us 124 : 1698443 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
125 : :
6368 tgl@sss.pgh.pa.us 126 [ - + ]: 1698443 : Assert(slot < LeafNodesPerPage);
127 : :
6375 heikki.linnakangas@i 128 : 1698443 : return fsmpage->fp_nodes[NonLeafNodesPerPage + slot];
129 : : }
130 : :
131 : : /*
132 : : * Returns the value at the root of a page.
133 : : *
134 : : * Since this is just a read-only access of a single byte, the page doesn't
135 : : * need to be locked.
136 : : */
137 : : uint8
138 : 143385 : fsm_get_max_avail(Page page)
139 : : {
6121 bruce@momjian.us 140 : 143385 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
141 : :
6375 heikki.linnakangas@i 142 : 143385 : return fsmpage->fp_nodes[0];
143 : : }
144 : :
145 : : /*
146 : : * Searches for a slot with category at least minvalue.
147 : : * Returns slot number, or -1 if none found.
148 : : *
149 : : * The caller must hold at least a shared lock on the page, and this
150 : : * function can unlock and lock the page again in exclusive mode if it
151 : : * needs to be updated. exclusive_lock_held should be set to true if the
152 : : * caller is already holding an exclusive lock, to avoid extra work.
153 : : *
154 : : * If advancenext is false, fp_next_slot is set to point to the returned
155 : : * slot, and if it's true, to the slot after the returned slot.
156 : : */
157 : : int
158 : 278432 : fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
159 : : bool exclusive_lock_held)
160 : : {
3616 kgrittn@postgresql.o 161 : 278432 : Page page = BufferGetPage(buf);
6121 bruce@momjian.us 162 : 278432 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
163 : : int nodeno;
164 : : int target;
165 : : uint16 slot;
166 : :
167 : 278432 : restart:
168 : :
169 : : /*
170 : : * Check the root first, and exit quickly if there's no leaf with enough
171 : : * free space
172 : : */
6375 heikki.linnakangas@i 173 [ + + ]: 278432 : if (fsmpage->fp_nodes[0] < minvalue)
174 : 222428 : return -1;
175 : :
176 : : /*
177 : : * Start search using fp_next_slot. It's just a hint, so check that it's
178 : : * sane. (This also handles wrapping around when the prior call returned
179 : : * the last slot on the page.)
180 : : */
181 : 56004 : target = fsmpage->fp_next_slot;
182 [ + - - + ]: 56004 : if (target < 0 || target >= LeafNodesPerPage)
6375 heikki.linnakangas@i 183 :UBC 0 : target = 0;
6375 heikki.linnakangas@i 184 :CBC 56004 : target += NonLeafNodesPerPage;
185 : :
186 : : /*----------
187 : : * Start the search from the target slot. At every step, move one
188 : : * node to the right, then climb up to the parent. Stop when we reach
189 : : * a node with enough free space (as we must, since the root has enough
190 : : * space).
191 : : *
192 : : * The idea is to gradually expand our "search triangle", that is, all
193 : : * nodes covered by the current node, and to be sure we search to the
194 : : * right from the start point. At the first step, only the target slot
195 : : * is examined. When we move up from a left child to its parent, we are
196 : : * adding the right-hand subtree of that parent to the search triangle.
197 : : * When we move right then up from a right child, we are dropping the
198 : : * current search triangle (which we know doesn't contain any suitable
199 : : * page) and instead looking at the next-larger-size triangle to its
200 : : * right. So we never look left from our original start point, and at
201 : : * each step the size of the search triangle doubles, ensuring it takes
202 : : * only log2(N) work to search N pages.
203 : : *
204 : : * The "move right" operation will wrap around if it hits the right edge
205 : : * of the tree, so the behavior is still good if we start near the right.
206 : : * Note also that the move-and-climb behavior ensures that we can't end
207 : : * up on one of the missing nodes at the right of the leaf level.
208 : : *
209 : : * For example, consider this tree:
210 : : *
211 : : * 7
212 : : * 7 6
213 : : * 5 7 6 5
214 : : * 4 5 5 7 2 6 5 2
215 : : * T
216 : : *
217 : : * Assume that the target node is the node indicated by the letter T,
218 : : * and we're searching for a node with value of 6 or higher. The search
219 : : * begins at T. At the first iteration, we move to the right, then to the
220 : : * parent, arriving at the rightmost 5. At the second iteration, we move
221 : : * to the right, wrapping around, then climb up, arriving at the 7 on the
222 : : * third level. 7 satisfies our search, so we descend down to the bottom,
223 : : * following the path of sevens. This is in fact the first suitable page
224 : : * to the right of (allowing for wraparound) our start point.
225 : : *----------
226 : : */
227 : 56004 : nodeno = target;
228 [ + + ]: 125893 : while (nodeno > 0)
229 : : {
230 [ + + ]: 122175 : if (fsmpage->fp_nodes[nodeno] >= minvalue)
231 : 52286 : break;
232 : :
233 : : /*
234 : : * Move to the right, wrapping around on same level if necessary, then
235 : : * climb up.
236 : : */
6368 tgl@sss.pgh.pa.us 237 : 69889 : nodeno = parentof(rightneighbor(nodeno));
238 : : }
239 : :
240 : : /*
241 : : * We're now at a node with enough free space, somewhere in the middle of
242 : : * the tree. Descend to the bottom, following a path with enough free
243 : : * space, preferring to move left if there's a choice.
244 : : */
6375 heikki.linnakangas@i 245 [ + + ]: 125893 : while (nodeno < NonLeafNodesPerPage)
246 : : {
6121 bruce@momjian.us 247 : 69889 : int childnodeno = leftchild(nodeno);
248 : :
6304 tgl@sss.pgh.pa.us 249 [ + - ]: 69889 : if (childnodeno < NodesPerPage &&
250 [ + + ]: 69889 : fsmpage->fp_nodes[childnodeno] >= minvalue)
251 : : {
252 : 50504 : nodeno = childnodeno;
253 : 50504 : continue;
254 : : }
255 : 19385 : childnodeno++; /* point to right child */
256 [ + - ]: 19385 : if (childnodeno < NodesPerPage &&
257 [ + - ]: 19385 : fsmpage->fp_nodes[childnodeno] >= minvalue)
258 : : {
259 : 19385 : nodeno = childnodeno;
260 : : }
261 : : else
262 : : {
263 : : /*
264 : : * Oops. The parent node promised that either left or right child
265 : : * has enough space, but neither actually did. This can happen in
266 : : * case of a "torn page", IOW if we crashed earlier while writing
267 : : * the page to disk, and only part of the page made it to disk.
268 : : *
269 : : * Fix the corruption and restart.
270 : : */
271 : : RelFileLocator rlocator;
272 : : ForkNumber forknum;
273 : : BlockNumber blknum;
274 : :
1348 rhaas@postgresql.org 275 :UBC 0 : BufferGetTag(buf, &rlocator, &forknum, &blknum);
1264 276 [ # # ]: 0 : elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
277 : : blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber);
278 : :
279 : : /* make sure we hold an exclusive lock */
6375 heikki.linnakangas@i 280 [ # # ]: 0 : if (!exclusive_lock_held)
281 : : {
282 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
283 : 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
284 : 0 : exclusive_lock_held = true;
285 : : }
286 : 0 : fsm_rebuild_page(page);
4654 jdavis@postgresql.or 287 : 0 : MarkBufferDirtyHint(buf, false);
6375 heikki.linnakangas@i 288 : 0 : goto restart;
289 : : }
290 : : }
291 : :
292 : : /* We're now at the bottom level, at a node with enough space. */
6375 heikki.linnakangas@i 293 :CBC 56004 : slot = nodeno - NonLeafNodesPerPage;
294 : :
295 : : /*
296 : : * Update the next-target pointer. Note that we do this even if we're only
297 : : * holding a shared lock, on the grounds that it's better to use a shared
298 : : * lock and get a garbled next pointer every now and then, than take the
299 : : * concurrency hit of an exclusive lock.
300 : : *
301 : : * Without an exclusive lock, we need to use the hint bit infrastructure
302 : : * to be allowed to modify the page.
303 : : *
304 : : * Wrap-around is handled at the beginning of this function.
305 : : */
5 andres@anarazel.de 306 [ + + + + ]:GNC 56004 : if (exclusive_lock_held || BufferBeginSetHintBits(buf))
307 : : {
308 : 56001 : fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
309 : :
310 [ + + ]: 56001 : if (!exclusive_lock_held)
311 : 40324 : BufferFinishSetHintBits(buf, false, false);
312 : : }
313 : :
6375 heikki.linnakangas@i 314 :CBC 56004 : return slot;
315 : : }
316 : :
317 : : /*
318 : : * Sets the available space to zero for all slots numbered >= nslots.
319 : : * Returns true if the page was modified.
320 : : */
321 : : bool
322 : 126 : fsm_truncate_avail(Page page, int nslots)
323 : : {
6121 bruce@momjian.us 324 : 126 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
325 : : uint8 *ptr;
326 : 126 : bool changed = false;
327 : :
6375 heikki.linnakangas@i 328 [ + - - + ]: 126 : Assert(nslots >= 0 && nslots < LeafNodesPerPage);
329 : :
330 : : /* Clear all truncated leaf nodes */
331 : 126 : ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots];
332 [ + + ]: 506026 : for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++)
333 : : {
334 [ + + ]: 505900 : if (*ptr != 0)
335 : 2744 : changed = true;
336 : 505900 : *ptr = 0;
337 : : }
338 : :
339 : : /* Fix upper nodes. */
340 [ + + ]: 126 : if (changed)
341 : 106 : fsm_rebuild_page(page);
342 : :
343 : 126 : return changed;
344 : : }
345 : :
346 : : /*
347 : : * Reconstructs the upper levels of a page. Returns true if the page
348 : : * was modified.
349 : : */
350 : : bool
351 : 106 : fsm_rebuild_page(Page page)
352 : : {
6121 bruce@momjian.us 353 : 106 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
354 : 106 : bool changed = false;
355 : : int nodeno;
356 : :
357 : : /*
358 : : * Start from the lowest non-leaf level, at last node, working our way
359 : : * backwards, through all non-leaf nodes at all levels, up to the root.
360 : : */
6375 heikki.linnakangas@i 361 [ + + ]: 434176 : for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--)
362 : : {
6121 bruce@momjian.us 363 : 434070 : int lchild = leftchild(nodeno);
364 : 434070 : int rchild = lchild + 1;
365 : 434070 : uint8 newvalue = 0;
366 : :
367 : : /* The first few nodes we examine might have zero or one child. */
6375 heikki.linnakangas@i 368 [ + + ]: 434070 : if (lchild < NodesPerPage)
369 : 432692 : newvalue = fsmpage->fp_nodes[lchild];
370 : :
371 [ + + ]: 434070 : if (rchild < NodesPerPage)
372 : 432586 : newvalue = Max(newvalue,
373 : : fsmpage->fp_nodes[rchild]);
374 : :
375 [ + + ]: 434070 : if (fsmpage->fp_nodes[nodeno] != newvalue)
376 : : {
377 : 3602 : fsmpage->fp_nodes[nodeno] = newvalue;
378 : 3602 : changed = true;
379 : : }
380 : : }
381 : :
382 : 106 : return changed;
383 : : }
|