Age Owner Branch data TLA Line data Source code
1 : : /*
2 : : * multixact_read_v18.c
3 : : *
4 : : * Functions to read multixact SLRUs from clusters of PostgreSQL version 18
5 : : * and older. In version 19, the multixid offsets were expanded from 32 to 64
6 : : * bits.
7 : : *
8 : : * Copyright (c) 2025, PostgreSQL Global Development Group
9 : : * src/bin/pg_upgrade/multixact_read_v18.c
10 : : */
11 : :
12 : : #include "postgres_fe.h"
13 : :
14 : : #include "multixact_read_v18.h"
15 : : #include "pg_upgrade.h"
16 : :
17 : : /*
18 : : * NOTE: below are a bunch of definitions that are copy-pasted from
19 : : * multixact.c from version 18. It's important that this file doesn't
20 : : * #include the new definitions with same names from "multixact_internal.h"!
21 : : *
22 : : * To further avoid confusion in the functions exposed outside this source
23 : : * file, we use MultiXactOffset32 to represent the old-style 32-bit multixid
24 : : * offsets. The new 64-bit MultiXactOffset should not be used anywhere in
25 : : * this file.
26 : : */
27 : : #ifdef MULTIXACT_INTERNAL_H
28 : : #error multixact_internal.h should not be included in multixact_read_v18.c
29 : : #endif
30 : : #define MultiXactOffset should_not_be_used
31 : :
32 : : /* We need four bytes per offset and 8 bytes per base for each page. */
33 : : #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset32))
34 : :
35 : : static inline int64
7 heikki.linnakangas@i 36 :UNC 0 : MultiXactIdToOffsetPage(MultiXactId multi)
37 : : {
38 : 0 : return multi / MULTIXACT_OFFSETS_PER_PAGE;
39 : : }
40 : :
41 : : static inline int
42 : 0 : MultiXactIdToOffsetEntry(MultiXactId multi)
43 : : {
44 : 0 : return multi % MULTIXACT_OFFSETS_PER_PAGE;
45 : : }
46 : :
47 : : /*
48 : : * The situation for members is a bit more complex: we store one byte of
49 : : * additional flag bits for each TransactionId. To do this without getting
50 : : * into alignment issues, we store four bytes of flags, and then the
51 : : * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
52 : : * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
53 : : * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
54 : : * performance) trumps space efficiency here.
55 : : *
56 : : * Note that the "offset" macros work with byte offset, not array indexes, so
57 : : * arithmetic must be done using "char *" pointers.
58 : : */
59 : : /* We need eight bits per xact, so one xact fits in a byte */
60 : : #define MXACT_MEMBER_BITS_PER_XACT 8
61 : : #define MXACT_MEMBER_FLAGS_PER_BYTE 1
62 : : #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
63 : :
64 : : /* how many full bytes of flags are there in a group? */
65 : : #define MULTIXACT_FLAGBYTES_PER_GROUP 4
66 : : #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
67 : : (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
68 : : /* size in bytes of a complete group */
69 : : #define MULTIXACT_MEMBERGROUP_SIZE \
70 : : (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
71 : : #define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
72 : : #define MULTIXACT_MEMBERS_PER_PAGE \
73 : : (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
74 : :
75 : : /* page in which a member is to be found */
76 : : static inline int64
77 : 0 : MXOffsetToMemberPage(MultiXactOffset32 offset)
78 : : {
79 : 0 : return offset / MULTIXACT_MEMBERS_PER_PAGE;
80 : : }
81 : :
82 : : /* Location (byte offset within page) of flag word for a given member */
83 : : static inline int
84 : 0 : MXOffsetToFlagsOffset(MultiXactOffset32 offset)
85 : : {
86 : 0 : MultiXactOffset32 group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
87 : 0 : int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
88 : 0 : int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
89 : :
90 : 0 : return byteoff;
91 : : }
92 : :
93 : : /* Location (byte offset within page) of TransactionId of given member */
94 : : static inline int
95 : 0 : MXOffsetToMemberOffset(MultiXactOffset32 offset)
96 : : {
97 : 0 : int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
98 : :
99 : 0 : return MXOffsetToFlagsOffset(offset) +
100 : 0 : MULTIXACT_FLAGBYTES_PER_GROUP +
101 : : member_in_group * sizeof(TransactionId);
102 : : }
103 : :
104 : : static inline int
105 : 0 : MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
106 : : {
107 : 0 : int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
108 : 0 : int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
109 : :
110 : 0 : return bshift;
111 : : }
112 : :
113 : : /*
114 : : * Construct reader of old multixacts.
115 : : *
116 : : * Returns the malloced memory used by the all other calls in this module.
117 : : */
118 : : OldMultiXactReader *
119 : 0 : AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
120 : : MultiXactOffset32 nextOffset)
121 : : {
2 tgl@sss.pgh.pa.us 122 : 0 : OldMultiXactReader *state = pg_malloc_object(OldMultiXactReader);
7 heikki.linnakangas@i 123 : 0 : char dir[MAXPGPATH] = {0};
124 : :
125 : 0 : state->nextMXact = nextMulti;
126 : 0 : state->nextOffset = nextOffset;
127 : :
128 : 0 : pg_sprintf(dir, "%s/pg_multixact/offsets", pgdata);
129 : 0 : state->offset = AllocSlruRead(dir, false);
130 : :
131 : 0 : pg_sprintf(dir, "%s/pg_multixact/members", pgdata);
132 : 0 : state->members = AllocSlruRead(dir, false);
133 : :
134 : 0 : return state;
135 : : }
136 : :
137 : : /*
138 : : * This is a simplified version of the GetMultiXactIdMembers() server
139 : : * function:
140 : : *
141 : : * - Only return the updating member, if any. Upgrade only cares about the
142 : : * updaters. If there is no updating member, return somewhat arbitrarily
143 : : * the first locking-only member, because we don't have any way to represent
144 : : * "no members".
145 : : *
146 : : * - Because there's no concurrent activity, we don't need to worry about
147 : : * locking and some corner cases.
148 : : *
149 : : * - Don't bail out on invalid entries that could've been left behind after a
150 : : * server crash. Such multixids won't appear anywhere else on disk, so the
151 : : * server will never try to read them. During upgrade, however, we scan
152 : : * through all multixids in order, and will encounter such invalid but
153 : : * unreferenced multixids too. We try to distinguish between entries that
154 : : * are invalid because of missed disk writes, like entries with zeros in
155 : : * offsets or members, and entries that look corrupt in other ways that
156 : : * should not happen even on a server crash.
157 : : *
158 : : * Returns true on success, false if the multixact was invalid.
159 : : */
160 : : bool
161 : 0 : GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
162 : : MultiXactMember *member)
163 : : {
164 : : MultiXactId nextMXact,
165 : : nextOffset,
166 : : tmpMXact;
167 : : int64 pageno,
168 : : prev_pageno;
169 : : int entryno,
170 : : length;
171 : : char *buf;
172 : : MultiXactOffset32 *offptr,
173 : : offset;
174 : : MultiXactOffset32 nextMXOffset;
175 : 0 : TransactionId result_xid = InvalidTransactionId;
176 : 0 : MultiXactStatus result_status = 0;
177 : :
178 : 0 : nextMXact = state->nextMXact;
179 : 0 : nextOffset = state->nextOffset;
180 : :
181 : : /*
182 : : * Comment copied from GetMultiXactIdMembers in PostgreSQL v18
183 : : * multixact.c:
184 : : *
185 : : * Find out the offset at which we need to start reading MultiXactMembers
186 : : * and the number of members in the multixact. We determine the latter as
187 : : * the difference between this multixact's starting offset and the next
188 : : * one's. However, there are some corner cases to worry about:
189 : : *
190 : : * 1. This multixact may be the latest one created, in which case there is
191 : : * no next one to look at. The next multixact's offset should be set
192 : : * already, as we set it in RecordNewMultiXact(), but we used to not do
193 : : * that in older minor versions. To cope with that case, if this
194 : : * multixact is the latest one created, use the nextOffset value we read
195 : : * above as the endpoint.
196 : : *
197 : : * 2. Because GetNewMultiXactId skips over offset zero, to reserve zero
198 : : * for to mean "unset", there is an ambiguity near the point of offset
199 : : * wraparound. If we see next multixact's offset is one, is that our
200 : : * multixact's actual endpoint, or did it end at zero with a subsequent
201 : : * increment? We handle this using the knowledge that if the zero'th
202 : : * member slot wasn't filled, it'll contain zero, and zero isn't a valid
203 : : * transaction ID so it can't be a multixact member. Therefore, if we
204 : : * read a zero from the members array, just ignore it.
205 : : */
206 : :
207 : 0 : pageno = MultiXactIdToOffsetPage(multi);
208 : 0 : entryno = MultiXactIdToOffsetEntry(multi);
209 : :
210 : 0 : buf = SlruReadSwitchPage(state->offset, pageno);
211 : 0 : offptr = (MultiXactOffset32 *) buf;
212 : 0 : offptr += entryno;
213 : 0 : offset = *offptr;
214 : :
215 [ # # ]: 0 : if (offset == 0)
216 : : {
217 : : /* Invalid entry. These can be left behind on a server crash. */
218 : 0 : return false;
219 : : }
220 : :
221 : : /*
222 : : * Use the same increment rule as GetNewMultiXactId(), that is, don't
223 : : * handle wraparound explicitly until needed.
224 : : */
225 : 0 : tmpMXact = multi + 1;
226 : :
227 [ # # ]: 0 : if (nextMXact == tmpMXact)
228 : : {
229 : : /* Corner case 1: there is no next multixact */
230 : 0 : nextMXOffset = nextOffset;
231 : : }
232 : : else
233 : : {
234 : : /* handle wraparound if needed */
235 [ # # ]: 0 : if (tmpMXact < FirstMultiXactId)
236 : 0 : tmpMXact = FirstMultiXactId;
237 : :
238 : 0 : prev_pageno = pageno;
239 : :
240 : 0 : pageno = MultiXactIdToOffsetPage(tmpMXact);
241 : 0 : entryno = MultiXactIdToOffsetEntry(tmpMXact);
242 : :
243 [ # # ]: 0 : if (pageno != prev_pageno)
244 : 0 : buf = SlruReadSwitchPage(state->offset, pageno);
245 : :
246 : 0 : offptr = (MultiXactOffset32 *) buf;
247 : 0 : offptr += entryno;
248 : 0 : nextMXOffset = *offptr;
249 : : }
250 : :
251 [ # # ]: 0 : if (nextMXOffset == 0)
252 : : {
253 : : /* Invalid entry. These can be left behind on a server crash. */
254 : 0 : return false;
255 : : }
256 : 0 : length = nextMXOffset - offset;
257 : :
1 258 [ # # ]: 0 : if (length < 0)
259 : : {
260 : : /*
261 : : * This entry is corrupt. We should not see these even after a server
262 : : * crash.
263 : : */
264 : 0 : pg_fatal("multixact %u has an invalid length (%d)", multi, length);
265 : : }
266 [ # # ]: 0 : if (length == 0)
267 : : {
268 : : /*
269 : : * Invalid entry. The server never writes multixids with zero
270 : : * members, but it's not clear if a server crash or using pg_resetwal
271 : : * could leave them behind. Seems best to accept them.
272 : : */
273 : 0 : return false;
274 : : }
275 : :
276 : : /* read the members */
7 277 : 0 : prev_pageno = -1;
278 [ # # ]: 0 : for (int i = 0; i < length; i++, offset++)
279 : : {
280 : : TransactionId *xactptr;
281 : : uint32 *flagsptr;
282 : : int flagsoff;
283 : : int bshift;
284 : : int memberoff;
285 : : MultiXactStatus status;
286 : :
287 : 0 : pageno = MXOffsetToMemberPage(offset);
288 : 0 : memberoff = MXOffsetToMemberOffset(offset);
289 : :
290 [ # # ]: 0 : if (pageno != prev_pageno)
291 : : {
292 : 0 : buf = SlruReadSwitchPage(state->members, pageno);
293 : 0 : prev_pageno = pageno;
294 : : }
295 : :
296 : 0 : xactptr = (TransactionId *) (buf + memberoff);
297 [ # # ]: 0 : if (!TransactionIdIsValid(*xactptr))
298 : : {
299 : : /*
300 : : * Corner case 2: offset must have wrapped around to unused slot
301 : : * zero.
302 : : */
303 [ # # ]: 0 : if (offset == 0)
304 : 0 : continue;
305 : :
306 : : /*
307 : : * Otherwise this is an invalid entry that should not be
308 : : * referenced from anywhere in the heap. These can be left behind
309 : : * on a server crash. We could return 'false' here, but we prefer
310 : : * to continue reading the members and converting them the best we
311 : : * can, to preserve evidence in case this is corruption that
312 : : * should not have happened.
313 : : */
314 : : }
315 : :
316 : 0 : flagsoff = MXOffsetToFlagsOffset(offset);
317 : 0 : bshift = MXOffsetToFlagsBitShift(offset);
318 : 0 : flagsptr = (uint32 *) (buf + flagsoff);
319 : :
320 : 0 : status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
321 : :
322 : : /*
323 : : * Remember the updating XID among the members, or first locking XID
324 : : * if no updating XID.
325 : : */
326 [ # # ]: 0 : if (ISUPDATE_from_mxstatus(status))
327 : : {
328 : : /* sanity check */
329 [ # # ]: 0 : if (ISUPDATE_from_mxstatus(result_status))
330 : : {
331 : : /*
332 : : * We don't expect to see more than one updating member, even
333 : : * if the server had crashed.
334 : : */
335 : 0 : pg_fatal("multixact %u has more than one updating member",
336 : : multi);
337 : : }
338 : 0 : result_xid = *xactptr;
339 : 0 : result_status = status;
340 : : }
341 [ # # ]: 0 : else if (!TransactionIdIsValid(result_xid))
342 : : {
343 : 0 : result_xid = *xactptr;
344 : 0 : result_status = status;
345 : : }
346 : : }
347 : :
348 : 0 : member->xid = result_xid;
349 : 0 : member->status = result_status;
350 : 0 : return true;
351 : : }
352 : :
353 : : /*
354 : : * Frees the malloced reader.
355 : : */
356 : : void
357 : 0 : FreeOldMultiXactReader(OldMultiXactReader *state)
358 : : {
359 : 0 : FreeSlruRead(state->offset);
360 : 0 : FreeSlruRead(state->members);
361 : :
362 : 0 : pfree(state);
363 : 0 : }
|