Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * multixact.c
4 : : * PostgreSQL multi-transaction-log manager
5 : : *
6 : : * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 : : * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 : : * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 : : * TransactionId and a set of flag bits. The name is a bit historical:
10 : : * originally, a MultiXactId consisted of more than one TransactionId (except
11 : : * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 : : * legitimate to have MultiXactIds that only include a single Xid.
13 : : *
14 : : * The meaning of the flag bits is opaque to this module, but they are mostly
15 : : * used in heapam.c to identify lock modes that each of the member transactions
16 : : * is holding on any given tuple. This module just contains support to store
17 : : * and retrieve the arrays.
18 : : *
19 : : * We use two SLRU areas, one for storing the offsets at which the data
20 : : * starts for each MultiXactId in the other one. This trick allows us to
21 : : * store variable length arrays of TransactionIds. (We could alternatively
22 : : * use one area containing counts and TransactionIds, with valid MultiXactId
23 : : * values pointing at slots containing counts; but that way seems less robust
24 : : * since it would get completely confused if someone inquired about a bogus
25 : : * MultiXactId that pointed to an intermediate slot containing an XID.)
26 : : *
27 : : * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 : : * MEMBERs page is initialized to zeroes, as well as an
29 : : * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 : : * This module ignores the WAL rule "write xlog before data," because it
31 : : * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 : : * rule. The only way for the MXID to be referenced from any data page is for
33 : : * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 : : * an XLOG record that must follow ours. The normal LSN interlock between the
35 : : * data page and that XLOG record will ensure that our XLOG record reaches
36 : : * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 : : * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 : : * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 : : * module's XLOG records completely rebuild the data entered since the last
40 : : * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 : : * before each checkpoint is considered complete.
42 : : *
43 : : * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 : : * crashes and ensure that MXID and offset numbering increases monotonically
45 : : * across a crash. We do this in the same way as it's done for transaction
46 : : * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 : : * could need to worry about, and we just make sure that at the end of
48 : : * replay, the next-MXID and next-offset counters are at least as large as
49 : : * anything we saw during replay.
50 : : *
51 : : * We are able to remove segments no longer necessary by carefully tracking
52 : : * each table's used values: during vacuum, any multixact older than a certain
53 : : * value is removed; the cutoff value is stored in pg_class. The minimum value
54 : : * across all tables in each database is stored in pg_database, and the global
55 : : * minimum across all databases is part of pg_control and is kept in shared
56 : : * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 : : *
58 : : * When new multixactid values are to be created, care is taken that the
59 : : * counter does not fall within the wraparound horizon considering the global
60 : : * minimum value.
61 : : *
62 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
63 : : * Portions Copyright (c) 1994, Regents of the University of California
64 : : *
65 : : * src/backend/access/transam/multixact.c
66 : : *
67 : : *-------------------------------------------------------------------------
68 : : */
69 : : #include "postgres.h"
70 : :
71 : : #include "access/multixact.h"
72 : : #include "access/slru.h"
73 : : #include "access/twophase.h"
74 : : #include "access/twophase_rmgr.h"
75 : : #include "access/xlog.h"
76 : : #include "access/xloginsert.h"
77 : : #include "access/xlogutils.h"
78 : : #include "miscadmin.h"
79 : : #include "pg_trace.h"
80 : : #include "pgstat.h"
81 : : #include "postmaster/autovacuum.h"
82 : : #include "storage/pmsignal.h"
83 : : #include "storage/proc.h"
84 : : #include "storage/procarray.h"
85 : : #include "utils/guc_hooks.h"
86 : : #include "utils/injection_point.h"
87 : : #include "utils/lsyscache.h"
88 : : #include "utils/memutils.h"
89 : :
90 : :
91 : : /*
92 : : * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
93 : : * used everywhere else in Postgres.
94 : : *
95 : : * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
96 : : * MultiXact page numbering also wraps around at
97 : : * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
98 : : * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
99 : : * take no explicit notice of that fact in this module, except when comparing
100 : : * segment and page numbers in TruncateMultiXact (see
101 : : * MultiXactOffsetPagePrecedes).
102 : : */
103 : :
104 : : /* We need four bytes per offset */
105 : : #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
106 : :
107 : : static inline int64
447 heikki.linnakangas@i 108 :CBC 2355 : MultiXactIdToOffsetPage(MultiXactId multi)
109 : : {
110 : 2355 : return multi / MULTIXACT_OFFSETS_PER_PAGE;
111 : : }
112 : :
113 : : static inline int
114 : 1705 : MultiXactIdToOffsetEntry(MultiXactId multi)
115 : : {
116 : 1705 : return multi % MULTIXACT_OFFSETS_PER_PAGE;
117 : : }
118 : :
119 : : static inline int64
447 heikki.linnakangas@i 120 :UBC 0 : MultiXactIdToOffsetSegment(MultiXactId multi)
121 : : {
122 : 0 : return MultiXactIdToOffsetPage(multi) / SLRU_PAGES_PER_SEGMENT;
123 : : }
124 : :
125 : : /*
126 : : * The situation for members is a bit more complex: we store one byte of
127 : : * additional flag bits for each TransactionId. To do this without getting
128 : : * into alignment issues, we store four bytes of flags, and then the
129 : : * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
130 : : * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
131 : : * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
132 : : * performance) trumps space efficiency here.
133 : : *
134 : : * Note that the "offset" macros work with byte offset, not array indexes, so
135 : : * arithmetic must be done using "char *" pointers.
136 : : */
137 : : /* We need eight bits per xact, so one xact fits in a byte */
138 : : #define MXACT_MEMBER_BITS_PER_XACT 8
139 : : #define MXACT_MEMBER_FLAGS_PER_BYTE 1
140 : : #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
141 : :
142 : : /* how many full bytes of flags are there in a group? */
143 : : #define MULTIXACT_FLAGBYTES_PER_GROUP 4
144 : : #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
145 : : (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
146 : : /* size in bytes of a complete group */
147 : : #define MULTIXACT_MEMBERGROUP_SIZE \
148 : : (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
149 : : #define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
150 : : #define MULTIXACT_MEMBERS_PER_PAGE \
151 : : (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
152 : :
153 : : /*
154 : : * Because the number of items per page is not a divisor of the last item
155 : : * number (member 0xFFFFFFFF), the last segment does not use the maximum number
156 : : * of pages, and moreover the last used page therein does not use the same
157 : : * number of items as previous pages. (Another way to say it is that the
158 : : * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
159 : : * has some empty space after that item.)
160 : : *
161 : : * This constant is the number of members in the last page of the last segment.
162 : : */
163 : : #define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
164 : : ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
165 : :
166 : : /* page in which a member is to be found */
167 : : static inline int64
447 heikki.linnakangas@i 168 :CBC 3024 : MXOffsetToMemberPage(MultiXactOffset offset)
169 : : {
170 : 3024 : return offset / MULTIXACT_MEMBERS_PER_PAGE;
171 : : }
172 : :
173 : : static inline int64
447 heikki.linnakangas@i 174 :UBC 0 : MXOffsetToMemberSegment(MultiXactOffset offset)
175 : : {
176 : 0 : return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
177 : : }
178 : :
179 : : /* Location (byte offset within page) of flag word for a given member */
180 : : static inline int
447 heikki.linnakangas@i 181 :CBC 3727 : MXOffsetToFlagsOffset(MultiXactOffset offset)
182 : : {
183 : 3727 : MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
184 : 3727 : int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
185 : 3727 : int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
186 : :
187 : 3727 : return byteoff;
188 : : }
189 : :
190 : : static inline int
191 : 1589 : MXOffsetToFlagsBitShift(MultiXactOffset offset)
192 : : {
193 : 1589 : int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
194 : 1589 : int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
195 : :
196 : 1589 : return bshift;
197 : : }
198 : :
199 : : /* Location (byte offset within page) of TransactionId of given member */
200 : : static inline int
201 : 1306 : MXOffsetToMemberOffset(MultiXactOffset offset)
202 : : {
203 : 1306 : int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
204 : :
205 : 1306 : return MXOffsetToFlagsOffset(offset) +
206 : 1306 : MULTIXACT_FLAGBYTES_PER_GROUP +
207 : : member_in_group * sizeof(TransactionId);
208 : : }
209 : :
210 : : /* Multixact members wraparound thresholds. */
211 : : #define MULTIXACT_MEMBER_SAFE_THRESHOLD (MaxMultiXactOffset / 2)
212 : : #define MULTIXACT_MEMBER_DANGER_THRESHOLD \
213 : : (MaxMultiXactOffset - MaxMultiXactOffset / 4)
214 : :
215 : : static inline MultiXactId
447 heikki.linnakangas@i 216 :UBC 0 : PreviousMultiXactId(MultiXactId multi)
217 : : {
218 [ # # ]: 0 : return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
219 : : }
220 : :
221 : : /*
222 : : * Links to shared-memory data structures for MultiXact control
223 : : */
224 : : static SlruCtlData MultiXactOffsetCtlData;
225 : : static SlruCtlData MultiXactMemberCtlData;
226 : :
227 : : #define MultiXactOffsetCtl (&MultiXactOffsetCtlData)
228 : : #define MultiXactMemberCtl (&MultiXactMemberCtlData)
229 : :
230 : : /*
231 : : * MultiXact state shared across all backends. All this state is protected
232 : : * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
233 : : * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
234 : : * concurrency's sake, we avoid holding more than one of these locks at a
235 : : * time.)
236 : : */
237 : : typedef struct MultiXactStateData
238 : : {
239 : : /* next-to-be-assigned MultiXactId */
240 : : MultiXactId nextMXact;
241 : :
242 : : /* next-to-be-assigned offset */
243 : : MultiXactOffset nextOffset;
244 : :
245 : : /* Have we completed multixact startup? */
246 : : bool finishedStartup;
247 : :
248 : : /*
249 : : * Oldest multixact that is still potentially referenced by a relation.
250 : : * Anything older than this should not be consulted. These values are
251 : : * updated by vacuum.
252 : : */
253 : : MultiXactId oldestMultiXactId;
254 : : Oid oldestMultiXactDB;
255 : :
256 : : /*
257 : : * Oldest multixact offset that is potentially referenced by a multixact
258 : : * referenced by a relation. We don't always know this value, so there's
259 : : * a flag here to indicate whether or not we currently do.
260 : : */
261 : : MultiXactOffset oldestOffset;
262 : : bool oldestOffsetKnown;
263 : :
264 : : /* support for anti-wraparound measures */
265 : : MultiXactId multiVacLimit;
266 : : MultiXactId multiWarnLimit;
267 : : MultiXactId multiStopLimit;
268 : : MultiXactId multiWrapLimit;
269 : :
270 : : /* support for members anti-wraparound measures */
271 : : MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
272 : :
273 : : /*
274 : : * This is used to sleep until a multixact offset is written when we want
275 : : * to create the next one.
276 : : */
277 : : ConditionVariable nextoff_cv;
278 : :
279 : : /*
280 : : * Per-backend data starts here. We have two arrays stored in the area
281 : : * immediately following the MultiXactStateData struct. Each is indexed by
282 : : * ProcNumber.
283 : : *
284 : : * In both arrays, there's a slot for all normal backends
285 : : * (0..MaxBackends-1) followed by a slot for max_prepared_xacts prepared
286 : : * transactions.
287 : : *
288 : : * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
289 : : * transaction(s) could possibly be a member of, or InvalidMultiXactId
290 : : * when the backend has no live transaction that could possibly be a
291 : : * member of a MultiXact. Each backend sets its entry to the current
292 : : * nextMXact counter just before first acquiring a shared lock in a given
293 : : * transaction, and clears it at transaction end. (This works because only
294 : : * during or after acquiring a shared lock could an XID possibly become a
295 : : * member of a MultiXact, and that MultiXact would have to be created
296 : : * during or after the lock acquisition.)
297 : : *
298 : : * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
299 : : * current transaction(s) think is potentially live, or InvalidMultiXactId
300 : : * when not in a transaction or not in a transaction that's paid any
301 : : * attention to MultiXacts yet. This is computed when first needed in a
302 : : * given transaction, and cleared at transaction end. We can compute it
303 : : * as the minimum of the valid OldestMemberMXactId[] entries at the time
304 : : * we compute it (using nextMXact if none are valid). Each backend is
305 : : * required not to attempt to access any SLRU data for MultiXactIds older
306 : : * than its own OldestVisibleMXactId[] setting; this is necessary because
307 : : * the relevant SLRU data can be concurrently truncated away.
308 : : *
309 : : * The oldest valid value among all of the OldestMemberMXactId[] and
310 : : * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
311 : : * possible value still having any live member transaction -- OldestMxact.
312 : : * Any value older than that is typically removed from tuple headers, or
313 : : * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
314 : : * remove an individual MultiXact xmax whose value is >= its OldestMxact
315 : : * cutoff, though typically only when no individual member XID is still
316 : : * running. See FreezeMultiXactId for full details.
317 : : *
318 : : * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
319 : : * or the oldest extant Multi remaining in the table is used as the new
320 : : * pg_class.relminmxid value (whichever is earlier). The minimum of all
321 : : * relminmxid values in each database is stored in pg_database.datminmxid.
322 : : * In turn, the minimum of all of those values is stored in pg_control.
323 : : * This is used as the truncation point for pg_multixact when unneeded
324 : : * segments get removed by vac_truncate_clog() during vacuuming.
325 : : */
326 : : MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER];
327 : : } MultiXactStateData;
328 : :
329 : : /*
330 : : * Size of OldestMemberMXactId and OldestVisibleMXactId arrays.
331 : : */
332 : : #define MaxOldestSlot (MaxBackends + max_prepared_xacts)
333 : :
334 : : /* Pointers to the state data in shared memory */
335 : : static MultiXactStateData *MultiXactState;
336 : : static MultiXactId *OldestMemberMXactId;
337 : : static MultiXactId *OldestVisibleMXactId;
338 : :
339 : :
340 : : /*
341 : : * Definitions for the backend-local MultiXactId cache.
342 : : *
343 : : * We use this cache to store known MultiXacts, so we don't need to go to
344 : : * SLRU areas every time.
345 : : *
346 : : * The cache lasts for the duration of a single transaction, the rationale
347 : : * for this being that most entries will contain our own TransactionId and
348 : : * so they will be uninteresting by the time our next transaction starts.
349 : : * (XXX not clear that this is correct --- other members of the MultiXact
350 : : * could hang around longer than we did. However, it's not clear what a
351 : : * better policy for flushing old cache entries would be.) FIXME actually
352 : : * this is plain wrong now that multixact's may contain update Xids.
353 : : *
354 : : * We allocate the cache entries in a memory context that is deleted at
355 : : * transaction end, so we don't need to do retail freeing of entries.
356 : : */
357 : : typedef struct mXactCacheEnt
358 : : {
359 : : MultiXactId multi;
360 : : int nmembers;
361 : : dlist_node node;
362 : : MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
363 : : } mXactCacheEnt;
364 : :
365 : : #define MAX_CACHE_ENTRIES 256
366 : : static dclist_head MXactCache = DCLIST_STATIC_INIT(MXactCache);
367 : : static MemoryContext MXactContext = NULL;
368 : :
369 : : #ifdef MULTIXACT_DEBUG
370 : : #define debug_elog2(a,b) elog(a,b)
371 : : #define debug_elog3(a,b,c) elog(a,b,c)
372 : : #define debug_elog4(a,b,c,d) elog(a,b,c,d)
373 : : #define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
374 : : #define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
375 : : #else
376 : : #define debug_elog2(a,b)
377 : : #define debug_elog3(a,b,c)
378 : : #define debug_elog4(a,b,c,d)
379 : : #define debug_elog5(a,b,c,d,e)
380 : : #define debug_elog6(a,b,c,d,e,f)
381 : : #endif
382 : :
383 : : /* internal MultiXactId management */
384 : : static void MultiXactIdSetOldestVisible(void);
385 : : static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
386 : : int nmembers, MultiXactMember *members);
387 : : static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
388 : :
389 : : /* MultiXact cache management */
390 : : static int mxactMemberComparator(const void *arg1, const void *arg2);
391 : : static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
392 : : static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
393 : : static void mXactCachePut(MultiXactId multi, int nmembers,
394 : : MultiXactMember *members);
395 : :
396 : : /* management of SLRU infrastructure */
397 : : static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
398 : : static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
399 : : static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
400 : : MultiXactOffset offset2);
401 : : static void ExtendMultiXactOffset(MultiXactId multi);
402 : : static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
403 : : static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
404 : : MultiXactOffset start, uint32 distance);
405 : : static bool SetOffsetVacuumLimit(bool is_startup);
406 : : static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
407 : : static void WriteMTruncateXlogRec(Oid oldestMultiDB,
408 : : MultiXactId startTruncOff,
409 : : MultiXactId endTruncOff,
410 : : MultiXactOffset startTruncMemb,
411 : : MultiXactOffset endTruncMemb);
412 : :
413 : :
414 : : /*
415 : : * MultiXactIdCreate
416 : : * Construct a MultiXactId representing two TransactionIds.
417 : : *
418 : : * The two XIDs must be different, or be requesting different statuses.
419 : : *
420 : : * NB - we don't worry about our local MultiXactId cache here, because that
421 : : * is handled by the lower-level routines.
422 : : */
423 : : MultiXactId
4609 alvherre@alvh.no-ip. 424 :CBC 1036 : MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
425 : : TransactionId xid2, MultiXactStatus status2)
426 : : {
427 : : MultiXactId newMulti;
428 : : MultiXactMember members[2];
429 : :
1044 peter@eisentraut.org 430 [ - + ]: 1036 : Assert(TransactionIdIsValid(xid1));
431 [ - + ]: 1036 : Assert(TransactionIdIsValid(xid2));
432 : :
4609 alvherre@alvh.no-ip. 433 [ - + - - ]: 1036 : Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
434 : :
435 : : /* MultiXactIdSetOldestMember() must have been called already. */
552 heikki.linnakangas@i 436 [ - + ]: 1036 : Assert(MultiXactIdIsValid(OldestMemberMXactId[MyProcNumber]));
437 : :
438 : : /*
439 : : * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
440 : : * are still running. In typical usage, xid2 will be our own XID and the
441 : : * caller just did a check on xid1, so it'd be wasted effort.
442 : : */
443 : :
4609 alvherre@alvh.no-ip. 444 : 1036 : members[0].xid = xid1;
445 : 1036 : members[0].status = status1;
446 : 1036 : members[1].xid = xid2;
447 : 1036 : members[1].status = status2;
448 : :
4282 449 : 1036 : newMulti = MultiXactIdCreateFromMembers(2, members);
450 : :
451 : : debug_elog3(DEBUG2, "Create: %s",
452 : : mxid_to_string(newMulti, 2, members));
453 : :
7431 tgl@sss.pgh.pa.us 454 : 1036 : return newMulti;
455 : : }
456 : :
457 : : /*
458 : : * MultiXactIdExpand
459 : : * Add a TransactionId to a pre-existing MultiXactId.
460 : : *
461 : : * If the TransactionId is already a member of the passed MultiXactId with the
462 : : * same status, just return it as-is.
463 : : *
464 : : * Note that we do NOT actually modify the membership of a pre-existing
465 : : * MultiXactId; instead we create a new one. This is necessary to avoid
466 : : * a race condition against code trying to wait for one MultiXactId to finish;
467 : : * see notes in heapam.c.
468 : : *
469 : : * NB - we don't worry about our local MultiXactId cache here, because that
470 : : * is handled by the lower-level routines.
471 : : *
472 : : * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
473 : : * one upgraded by pg_upgrade from a cluster older than this feature) are not
474 : : * passed in.
475 : : */
476 : : MultiXactId
4609 alvherre@alvh.no-ip. 477 : 106 : MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
478 : : {
479 : : MultiXactId newMulti;
480 : : MultiXactMember *members;
481 : : MultiXactMember *newMembers;
482 : : int nmembers;
483 : : int i;
484 : : int j;
485 : :
1044 peter@eisentraut.org 486 [ - + ]: 106 : Assert(MultiXactIdIsValid(multi));
487 [ - + ]: 106 : Assert(TransactionIdIsValid(xid));
488 : :
489 : : /* MultiXactIdSetOldestMember() must have been called already. */
552 heikki.linnakangas@i 490 [ - + ]: 106 : Assert(MultiXactIdIsValid(OldestMemberMXactId[MyProcNumber]));
491 : :
492 : : debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
493 : : multi, xid, mxstatus_to_string(status));
494 : :
495 : : /*
496 : : * Note: we don't allow for old multis here. The reason is that the only
497 : : * caller of this function does a check that the multixact is no longer
498 : : * running.
499 : : */
4057 alvherre@alvh.no-ip. 500 : 106 : nmembers = GetMultiXactIdMembers(multi, &members, false, false);
501 : :
7436 tgl@sss.pgh.pa.us 502 [ - + ]: 106 : if (nmembers < 0)
503 : : {
504 : : MultiXactMember member;
505 : :
506 : : /*
507 : : * The MultiXactId is obsolete. This can only happen if all the
508 : : * MultiXactId members stop running between the caller checking and
509 : : * passing it to us. It would be better to return that fact to the
510 : : * caller, but it would complicate the API and it's unlikely to happen
511 : : * too often, so just deal with it by creating a singleton MultiXact.
512 : : */
4609 alvherre@alvh.no-ip. 513 :UBC 0 : member.xid = xid;
514 : 0 : member.status = status;
4282 515 : 0 : newMulti = MultiXactIdCreateFromMembers(1, &member);
516 : :
517 : : debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
518 : : multi, newMulti);
7436 tgl@sss.pgh.pa.us 519 : 0 : return newMulti;
520 : : }
521 : :
522 : : /*
523 : : * If the TransactionId is already a member of the MultiXactId with the
524 : : * same status, just return the existing MultiXactId.
525 : : */
7436 tgl@sss.pgh.pa.us 526 [ + + ]:CBC 334 : for (i = 0; i < nmembers; i++)
527 : : {
4609 alvherre@alvh.no-ip. 528 [ + + ]: 228 : if (TransactionIdEquals(members[i].xid, xid) &&
529 [ - + ]: 54 : (members[i].status == status))
530 : : {
531 : : debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
532 : : xid, multi);
7427 tgl@sss.pgh.pa.us 533 :UBC 0 : pfree(members);
7436 534 : 0 : return multi;
535 : : }
536 : : }
537 : :
538 : : /*
539 : : * Determine which of the members of the MultiXactId are still of
540 : : * interest. This is any running transaction, and also any transaction
541 : : * that grabbed something stronger than just a lock and was committed. (An
542 : : * update that aborted is of no interest here; and having more than one
543 : : * update Xid in a multixact would cause errors elsewhere.)
544 : : *
545 : : * Removing dead members is not just an optimization: freezing of tuples
546 : : * whose Xmax are multis depends on this behavior.
547 : : *
548 : : * Note we have the same race condition here as above: j could be 0 at the
549 : : * end of the loop.
550 : : */
551 : : newMembers = (MultiXactMember *)
4609 alvherre@alvh.no-ip. 552 :CBC 106 : palloc(sizeof(MultiXactMember) * (nmembers + 1));
553 : :
7436 tgl@sss.pgh.pa.us 554 [ + + ]: 334 : for (i = 0, j = 0; i < nmembers; i++)
555 : : {
4609 alvherre@alvh.no-ip. 556 [ + + ]: 228 : if (TransactionIdIsInProgress(members[i].xid) ||
4153 557 [ + + - + ]: 45 : (ISUPDATE_from_mxstatus(members[i].status) &&
4609 558 : 6 : TransactionIdDidCommit(members[i].xid)))
559 : : {
560 : 189 : newMembers[j].xid = members[i].xid;
561 : 189 : newMembers[j++].status = members[i].status;
562 : : }
563 : : }
564 : :
565 : 106 : newMembers[j].xid = xid;
566 : 106 : newMembers[j++].status = status;
4282 567 : 106 : newMulti = MultiXactIdCreateFromMembers(j, newMembers);
568 : :
7436 tgl@sss.pgh.pa.us 569 : 106 : pfree(members);
570 : 106 : pfree(newMembers);
571 : :
572 : : debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
573 : :
574 : 106 : return newMulti;
575 : : }
576 : :
577 : : /*
578 : : * MultiXactIdIsRunning
579 : : * Returns whether a MultiXactId is "running".
580 : : *
581 : : * We return true if at least one member of the given MultiXactId is still
582 : : * running. Note that a "false" result is certain not to change,
583 : : * because it is not legal to add members to an existing MultiXactId.
584 : : *
585 : : * Caller is expected to have verified that the multixact does not come from
586 : : * a pg_upgraded share-locked tuple.
587 : : */
588 : : bool
4057 alvherre@alvh.no-ip. 589 : 1011 : MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
590 : : {
591 : : MultiXactMember *members;
592 : : int nmembers;
593 : : int i;
594 : :
595 : : debug_elog3(DEBUG2, "IsRunning %u?", multi);
596 : :
597 : : /*
598 : : * "false" here means we assume our callers have checked that the given
599 : : * multi cannot possibly come from a pg_upgraded database.
600 : : */
601 : 1011 : nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
602 : :
3802 603 [ + + ]: 1011 : if (nmembers <= 0)
604 : : {
605 : : debug_elog2(DEBUG2, "IsRunning: no members");
7436 tgl@sss.pgh.pa.us 606 : 677 : return false;
607 : : }
608 : :
609 : : /*
610 : : * Checking for myself is cheap compared to looking in shared memory;
611 : : * return true if any live subtransaction of the current top-level
612 : : * transaction is a member.
613 : : *
614 : : * This is not needed for correctness, it's just a fast path.
615 : : */
616 [ + + ]: 795 : for (i = 0; i < nmembers; i++)
617 : : {
4609 alvherre@alvh.no-ip. 618 [ + + ]: 617 : if (TransactionIdIsCurrentTransactionId(members[i].xid))
619 : : {
620 : : debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
7427 tgl@sss.pgh.pa.us 621 : 156 : pfree(members);
7436 622 : 156 : return true;
623 : : }
624 : : }
625 : :
626 : : /*
627 : : * This could be made faster by having another entry point in procarray.c,
628 : : * walking the PGPROC array only once for all the members. But in most
629 : : * cases nmembers should be small enough that it doesn't much matter.
630 : : */
631 [ + + ]: 294 : for (i = 0; i < nmembers; i++)
632 : : {
4609 alvherre@alvh.no-ip. 633 [ + + ]: 251 : if (TransactionIdIsInProgress(members[i].xid))
634 : : {
635 : : debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
636 : : i, members[i].xid);
7427 tgl@sss.pgh.pa.us 637 : 135 : pfree(members);
7436 638 : 135 : return true;
639 : : }
640 : : }
641 : :
642 : 43 : pfree(members);
643 : :
644 : : debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
645 : :
646 : 43 : return false;
647 : : }
648 : :
649 : : /*
650 : : * MultiXactIdSetOldestMember
651 : : * Save the oldest MultiXactId this transaction could be a member of.
652 : : *
653 : : * We set the OldestMemberMXactId for a given transaction the first time it's
654 : : * going to do some operation that might require a MultiXactId (tuple lock,
655 : : * update or delete). We need to do this even if we end up using a
656 : : * TransactionId instead of a MultiXactId, because there is a chance that
657 : : * another transaction would add our XID to a MultiXactId.
658 : : *
659 : : * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
660 : : * be called just before doing any such possibly-MultiXactId-able operation.
661 : : */
662 : : void
663 : 1800548 : MultiXactIdSetOldestMember(void)
664 : : {
552 heikki.linnakangas@i 665 [ + + ]: 1800548 : if (!MultiXactIdIsValid(OldestMemberMXactId[MyProcNumber]))
666 : : {
667 : : MultiXactId nextMXact;
668 : :
669 : : /*
670 : : * You might think we don't need to acquire a lock here, since
671 : : * fetching and storing of TransactionIds is probably atomic, but in
672 : : * fact we do: suppose we pick up nextMXact and then lose the CPU for
673 : : * a long time. Someone else could advance nextMXact, and then
674 : : * another someone else could compute an OldestVisibleMXactId that
675 : : * would be after the value we are going to store when we get control
676 : : * back. Which would be wrong.
677 : : *
678 : : * Note that a shared lock is sufficient, because it's enough to stop
679 : : * someone from advancing nextMXact; and nobody else could be trying
680 : : * to write to our OldestMember entry, only reading (and we assume
681 : : * storing it is atomic.)
682 : : */
4265 alvherre@alvh.no-ip. 683 : 69112 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
684 : :
685 : : /*
686 : : * We have to beware of the possibility that nextMXact is in the
687 : : * wrapped-around state. We don't fix the counter itself here, but we
688 : : * must be sure to store a valid value in our array entry.
689 : : */
7436 tgl@sss.pgh.pa.us 690 : 69112 : nextMXact = MultiXactState->nextMXact;
691 [ - + ]: 69112 : if (nextMXact < FirstMultiXactId)
7436 tgl@sss.pgh.pa.us 692 :UBC 0 : nextMXact = FirstMultiXactId;
693 : :
552 heikki.linnakangas@i 694 :CBC 69112 : OldestMemberMXactId[MyProcNumber] = nextMXact;
695 : :
7436 tgl@sss.pgh.pa.us 696 : 69112 : LWLockRelease(MultiXactGenLock);
697 : :
698 : : debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
699 : : MyProcNumber, nextMXact);
700 : : }
701 : 1800548 : }
702 : :
703 : : /*
704 : : * MultiXactIdSetOldestVisible
705 : : * Save the oldest MultiXactId this transaction considers possibly live.
706 : : *
707 : : * We set the OldestVisibleMXactId for a given transaction the first time
708 : : * it's going to inspect any MultiXactId. Once we have set this, we are
709 : : * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
710 : : * won't be truncated away.
711 : : *
712 : : * The value to set is the oldest of nextMXact and all the valid per-backend
713 : : * OldestMemberMXactId[] entries. Because of the locking we do, we can be
714 : : * certain that no subsequent call to MultiXactIdSetOldestMember can set
715 : : * an OldestMemberMXactId[] entry older than what we compute here. Therefore
716 : : * there is no live transaction, now or later, that can be a member of any
717 : : * MultiXactId older than the OldestVisibleMXactId we compute here.
718 : : */
719 : : static void
720 : 919 : MultiXactIdSetOldestVisible(void)
721 : : {
552 heikki.linnakangas@i 722 [ + + ]: 919 : if (!MultiXactIdIsValid(OldestVisibleMXactId[MyProcNumber]))
723 : : {
724 : : MultiXactId oldestMXact;
725 : : int i;
726 : :
7436 tgl@sss.pgh.pa.us 727 : 250 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
728 : :
729 : : /*
730 : : * We have to beware of the possibility that nextMXact is in the
731 : : * wrapped-around state. We don't fix the counter itself here, but we
732 : : * must be sure to store a valid value in our array entry.
733 : : */
734 : 250 : oldestMXact = MultiXactState->nextMXact;
735 [ - + ]: 250 : if (oldestMXact < FirstMultiXactId)
7436 tgl@sss.pgh.pa.us 736 :UBC 0 : oldestMXact = FirstMultiXactId;
737 : :
552 heikki.linnakangas@i 738 [ + + ]:CBC 33682 : for (i = 0; i < MaxOldestSlot; i++)
739 : : {
7436 tgl@sss.pgh.pa.us 740 : 33432 : MultiXactId thisoldest = OldestMemberMXactId[i];
741 : :
742 [ + + + + ]: 33736 : if (MultiXactIdIsValid(thisoldest) &&
743 : 304 : MultiXactIdPrecedes(thisoldest, oldestMXact))
744 : 124 : oldestMXact = thisoldest;
745 : : }
746 : :
552 heikki.linnakangas@i 747 : 250 : OldestVisibleMXactId[MyProcNumber] = oldestMXact;
748 : :
7436 tgl@sss.pgh.pa.us 749 : 250 : LWLockRelease(MultiXactGenLock);
750 : :
751 : : debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u",
752 : : MyProcNumber, oldestMXact);
753 : : }
754 : 919 : }
755 : :
756 : : /*
757 : : * ReadNextMultiXactId
758 : : * Return the next MultiXactId to be assigned, but don't allocate it
759 : : */
760 : : MultiXactId
4609 alvherre@alvh.no-ip. 761 : 29942 : ReadNextMultiXactId(void)
762 : : {
763 : : MultiXactId mxid;
764 : :
765 : : /* XXX we could presumably do this without a lock. */
766 : 29942 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
767 : 29942 : mxid = MultiXactState->nextMXact;
768 : 29942 : LWLockRelease(MultiXactGenLock);
769 : :
770 [ - + ]: 29942 : if (mxid < FirstMultiXactId)
4609 alvherre@alvh.no-ip. 771 :UBC 0 : mxid = FirstMultiXactId;
772 : :
4609 alvherre@alvh.no-ip. 773 :CBC 29942 : return mxid;
774 : : }
775 : :
776 : : /*
777 : : * ReadMultiXactIdRange
778 : : * Get the range of IDs that may still be referenced by a relation.
779 : : */
780 : : void
1780 rhaas@postgresql.org 781 : 1478 : ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
782 : : {
783 : 1478 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
784 : 1478 : *oldest = MultiXactState->oldestMultiXactId;
785 : 1478 : *next = MultiXactState->nextMXact;
786 : 1478 : LWLockRelease(MultiXactGenLock);
787 : :
788 [ - + ]: 1478 : if (*oldest < FirstMultiXactId)
1780 rhaas@postgresql.org 789 :UBC 0 : *oldest = FirstMultiXactId;
1780 rhaas@postgresql.org 790 [ - + ]:CBC 1478 : if (*next < FirstMultiXactId)
1780 rhaas@postgresql.org 791 :UBC 0 : *next = FirstMultiXactId;
1780 rhaas@postgresql.org 792 :CBC 1478 : }
793 : :
794 : :
795 : : /*
796 : : * MultiXactIdCreateFromMembers
797 : : * Make a new MultiXactId from the specified set of members
798 : : *
799 : : * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
800 : : * given TransactionIds as members. Returns the newly created MultiXactId.
801 : : *
802 : : * NB: the passed members[] array will be sorted in-place.
803 : : */
804 : : MultiXactId
4282 alvherre@alvh.no-ip. 805 : 1143 : MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
806 : : {
807 : : MultiXactId multi;
808 : : MultiXactOffset offset;
809 : : xl_multixact_create xlrec;
810 : :
811 : : debug_elog3(DEBUG2, "Create: %s",
812 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
813 : :
814 : : /*
815 : : * See if the same set of members already exists in our cache; if so, just
816 : : * re-use that MultiXactId. (Note: it might seem that looking in our
817 : : * cache is insufficient, and we ought to search disk to see if a
818 : : * duplicate definition already exists. But since we only ever create
819 : : * MultiXacts containing our own XID, in most cases any such MultiXacts
820 : : * were in fact created by us, and so will be in our cache. There are
821 : : * corner cases where someone else added us to a MultiXact without our
822 : : * knowledge, but it's not worth checking for.)
823 : : */
4609 824 : 1143 : multi = mXactCacheGetBySet(nmembers, members);
7436 tgl@sss.pgh.pa.us 825 [ + + ]: 1143 : if (MultiXactIdIsValid(multi))
826 : : {
827 : : debug_elog2(DEBUG2, "Create: in cache!");
828 : 849 : return multi;
829 : : }
830 : :
831 : : /* Verify that there is a single update Xid among the given members. */
832 : : {
833 : : int i;
4153 alvherre@alvh.no-ip. 834 : 294 : bool has_update = false;
835 : :
836 [ + + ]: 959 : for (i = 0; i < nmembers; i++)
837 : : {
838 [ + + ]: 665 : if (ISUPDATE_from_mxstatus(members[i].status))
839 : : {
840 [ - + ]: 130 : if (has_update)
1017 alvherre@alvh.no-ip. 841 [ # # ]:UBC 0 : elog(ERROR, "new multixact has more than one updating member: %s",
842 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
4153 alvherre@alvh.no-ip. 843 :CBC 130 : has_update = true;
844 : : }
845 : : }
846 : : }
847 : :
848 : : /* Load the injection point before entering the critical section */
849 : : INJECTION_POINT_LOAD("multixact-create-from-members");
850 : :
851 : : /*
852 : : * Assign the MXID and offsets range to use, and make sure there is space
853 : : * in the OFFSETs and MEMBERs files. NB: this routine does
854 : : * START_CRIT_SECTION().
855 : : *
856 : : * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
857 : : * that we've called MultiXactIdSetOldestMember here. This is because
858 : : * this routine is used in some places to create new MultiXactIds of which
859 : : * the current backend is not a member, notably during freezing of multis
860 : : * in vacuum. During vacuum, in particular, it would be unacceptable to
861 : : * keep OldestMulti set, in case it runs for long.
862 : : */
4609 863 : 294 : multi = GetNewMultiXactId(nmembers, &offset);
864 : :
865 : : INJECTION_POINT_CACHED("multixact-create-from-members", NULL);
866 : :
867 : : /* Make an XLOG entry describing the new MXID. */
7395 tgl@sss.pgh.pa.us 868 : 294 : xlrec.mid = multi;
869 : 294 : xlrec.moff = offset;
4609 alvherre@alvh.no-ip. 870 : 294 : xlrec.nmembers = nmembers;
871 : :
872 : : /*
873 : : * XXX Note: there's a lot of padding space in MultiXactMember. We could
874 : : * find a more compact representation of this Xlog record -- perhaps all
875 : : * the status flags in one XLogRecData, then all the xids in another one?
876 : : * Not clear that it's worth the trouble though.
877 : : */
3943 heikki.linnakangas@i 878 : 294 : XLogBeginInsert();
207 peter@eisentraut.org 879 : 294 : XLogRegisterData(&xlrec, SizeOfMultiXactCreate);
880 : 294 : XLogRegisterData(members, nmembers * sizeof(MultiXactMember));
881 : :
3943 heikki.linnakangas@i 882 : 294 : (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
883 : :
884 : : /* Now enter the information into the OFFSETs and MEMBERs logs */
4609 alvherre@alvh.no-ip. 885 : 294 : RecordNewMultiXact(multi, offset, nmembers, members);
886 : :
887 : : /* Done with critical section */
7253 tgl@sss.pgh.pa.us 888 [ - + ]: 294 : END_CRIT_SECTION();
889 : :
890 : : /* Store the new MultiXactId in the local cache, too */
4609 alvherre@alvh.no-ip. 891 : 294 : mXactCachePut(multi, nmembers, members);
892 : :
893 : : debug_elog2(DEBUG2, "Create: all done");
894 : :
7395 tgl@sss.pgh.pa.us 895 : 294 : return multi;
896 : : }
897 : :
898 : : /*
899 : : * RecordNewMultiXact
900 : : * Write info about a new multixact into the offsets and members files
901 : : *
902 : : * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
903 : : * use it.
904 : : */
905 : : static void
906 : 296 : RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
907 : : int nmembers, MultiXactMember *members)
908 : : {
909 : : int64 pageno;
910 : : int64 prev_pageno;
911 : : int entryno;
912 : : int slotno;
913 : : MultiXactOffset *offptr;
914 : : int i;
915 : : LWLock *lock;
556 alvherre@alvh.no-ip. 916 : 296 : LWLock *prevlock = NULL;
917 : :
7436 tgl@sss.pgh.pa.us 918 : 296 : pageno = MultiXactIdToOffsetPage(multi);
919 : 296 : entryno = MultiXactIdToOffsetEntry(multi);
920 : :
556 alvherre@alvh.no-ip. 921 : 296 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
922 : 296 : LWLockAcquire(lock, LW_EXCLUSIVE);
923 : :
924 : : /*
925 : : * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
926 : : * to complain about if there's any I/O error. This is kinda bogus, but
927 : : * since the errors will always give the full pathname, it should be clear
928 : : * enough that a MultiXactId is really involved. Perhaps someday we'll
929 : : * take the trouble to generalize the slru.c error reporting code.
930 : : */
6611 tgl@sss.pgh.pa.us 931 : 296 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
7395 932 : 296 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7436 933 : 296 : offptr += entryno;
934 : :
935 : 296 : *offptr = offset;
936 : :
7245 937 : 296 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
938 : :
939 : : /* Release MultiXactOffset SLRU lock. */
556 alvherre@alvh.no-ip. 940 : 296 : LWLockRelease(lock);
941 : :
942 : : /*
943 : : * If anybody was waiting to know the offset of this multixact ID we just
944 : : * wrote, they can read it now, so wake them up.
945 : : */
517 946 : 296 : ConditionVariableBroadcast(&MultiXactState->nextoff_cv);
947 : :
7436 tgl@sss.pgh.pa.us 948 : 296 : prev_pageno = -1;
949 : :
4609 alvherre@alvh.no-ip. 950 [ + + ]: 965 : for (i = 0; i < nmembers; i++, offset++)
951 : : {
952 : : TransactionId *memberptr;
953 : : uint32 *flagsptr;
954 : : uint32 flagsval;
955 : : int bshift;
956 : : int flagsoff;
957 : : int memberoff;
958 : :
959 [ - + ]: 669 : Assert(members[i].status <= MultiXactStatusUpdate);
960 : :
7436 tgl@sss.pgh.pa.us 961 : 669 : pageno = MXOffsetToMemberPage(offset);
4609 alvherre@alvh.no-ip. 962 : 669 : memberoff = MXOffsetToMemberOffset(offset);
963 : 669 : flagsoff = MXOffsetToFlagsOffset(offset);
964 : 669 : bshift = MXOffsetToFlagsBitShift(offset);
965 : :
7436 tgl@sss.pgh.pa.us 966 [ + + ]: 669 : if (pageno != prev_pageno)
967 : : {
968 : : /*
969 : : * MultiXactMember SLRU page is changed so check if this new page
970 : : * fall into the different SLRU bank then release the old bank's
971 : : * lock and acquire lock on the new bank.
972 : : */
556 alvherre@alvh.no-ip. 973 : 296 : lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
974 [ + - ]: 296 : if (lock != prevlock)
975 : : {
976 [ - + ]: 296 : if (prevlock != NULL)
556 alvherre@alvh.no-ip. 977 :UBC 0 : LWLockRelease(prevlock);
978 : :
556 alvherre@alvh.no-ip. 979 :CBC 296 : LWLockAcquire(lock, LW_EXCLUSIVE);
980 : 296 : prevlock = lock;
981 : : }
6611 tgl@sss.pgh.pa.us 982 : 296 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
7436 983 : 296 : prev_pageno = pageno;
984 : : }
985 : :
986 : 669 : memberptr = (TransactionId *)
4609 alvherre@alvh.no-ip. 987 : 669 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
988 : :
989 : 669 : *memberptr = members[i].xid;
990 : :
991 : 669 : flagsptr = (uint32 *)
992 : 669 : (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
993 : :
994 : 669 : flagsval = *flagsptr;
995 : 669 : flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
996 : 669 : flagsval |= (members[i].status << bshift);
997 : 669 : *flagsptr = flagsval;
998 : :
7245 tgl@sss.pgh.pa.us 999 : 669 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
1000 : : }
1001 : :
556 alvherre@alvh.no-ip. 1002 [ + - ]: 296 : if (prevlock != NULL)
1003 : 296 : LWLockRelease(prevlock);
7436 tgl@sss.pgh.pa.us 1004 : 296 : }
1005 : :
1006 : : /*
1007 : : * GetNewMultiXactId
1008 : : * Get the next MultiXactId.
1009 : : *
1010 : : * Also, reserve the needed amount of space in the "members" area. The
1011 : : * starting offset of the reserved space is returned in *offset.
1012 : : *
1013 : : * This may generate XLOG records for expansion of the offsets and/or members
1014 : : * files. Unfortunately, we have to do that while holding MultiXactGenLock
1015 : : * to avoid race conditions --- the XLOG record for zeroing a page must appear
1016 : : * before any backend can possibly try to store data in that page!
1017 : : *
1018 : : * We start a critical section before advancing the shared counters. The
1019 : : * caller must end the critical section after writing SLRU data.
1020 : : */
1021 : : static MultiXactId
4609 alvherre@alvh.no-ip. 1022 : 294 : GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
1023 : : {
1024 : : MultiXactId result;
1025 : : MultiXactOffset nextOffset;
1026 : :
1027 : : debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
1028 : :
1029 : : /* safety check, we should never get this far in a HS standby */
1030 [ - + ]: 294 : if (RecoveryInProgress())
4609 alvherre@alvh.no-ip. 1031 [ # # ]:UBC 0 : elog(ERROR, "cannot assign MultiXactIds during recovery");
1032 : :
7436 tgl@sss.pgh.pa.us 1033 :CBC 294 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1034 : :
1035 : : /* Handle wraparound of the nextMXact counter */
1036 [ - + ]: 294 : if (MultiXactState->nextMXact < FirstMultiXactId)
7436 tgl@sss.pgh.pa.us 1037 :UBC 0 : MultiXactState->nextMXact = FirstMultiXactId;
1038 : :
1039 : : /* Assign the MXID */
7436 tgl@sss.pgh.pa.us 1040 :CBC 294 : result = MultiXactState->nextMXact;
1041 : :
1042 : : /*----------
1043 : : * Check to see if it's safe to assign another MultiXactId. This protects
1044 : : * against catastrophic data loss due to multixact wraparound. The basic
1045 : : * rules are:
1046 : : *
1047 : : * If we're past multiVacLimit or the safe threshold for member storage
1048 : : * space, or we don't know what the safe threshold for member storage is,
1049 : : * start trying to force autovacuum cycles.
1050 : : * If we're past multiWarnLimit, start issuing warnings.
1051 : : * If we're past multiStopLimit, refuse to create new MultiXactIds.
1052 : : *
1053 : : * Note these are pretty much the same protections in GetNewTransactionId.
1054 : : *----------
1055 : : */
3730 andres@anarazel.de 1056 [ - + ]: 294 : if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit))
1057 : : {
1058 : : /*
1059 : : * For safety's sake, we release MultiXactGenLock while sending
1060 : : * signals, warnings, etc. This is not so much because we care about
1061 : : * preserving concurrency in this situation, as to avoid any
1062 : : * possibility of deadlock while doing get_database_name(). First,
1063 : : * copy all the shared values we'll need in this path.
1064 : : */
4609 alvherre@alvh.no-ip. 1065 :UBC 0 : MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
1066 : 0 : MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
1067 : 0 : MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
1068 : 0 : Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
1069 : :
1070 : 0 : LWLockRelease(MultiXactGenLock);
1071 : :
1072 [ # # ]: 0 : if (IsUnderPostmaster &&
1073 [ # # ]: 0 : !MultiXactIdPrecedes(result, multiStopLimit))
1074 : : {
1075 : 0 : char *oldest_datname = get_database_name(oldest_datoid);
1076 : :
1077 : : /*
1078 : : * Immediately kick autovacuum into action as we're already in
1079 : : * ERROR territory.
1080 : : */
3730 andres@anarazel.de 1081 : 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1082 : :
1083 : : /* complain even if that DB has disappeared */
4609 alvherre@alvh.no-ip. 1084 [ # # ]: 0 : if (oldest_datname)
1085 [ # # ]: 0 : ereport(ERROR,
1086 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1087 : : errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
1088 : : oldest_datname),
1089 : : errhint("Execute a database-wide VACUUM in that database.\n"
1090 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1091 : : else
1092 [ # # ]: 0 : ereport(ERROR,
1093 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1094 : : errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
1095 : : oldest_datoid),
1096 : : errhint("Execute a database-wide VACUUM in that database.\n"
1097 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1098 : : }
1099 : :
1100 : : /*
1101 : : * To avoid swamping the postmaster with signals, we issue the autovac
1102 : : * request only once per 64K multis generated. This still gives
1103 : : * plenty of chances before we get into real trouble.
1104 : : */
3730 andres@anarazel.de 1105 [ # # # # ]: 0 : if (IsUnderPostmaster && (result % 65536) == 0)
1106 : 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1107 : :
1108 [ # # ]: 0 : if (!MultiXactIdPrecedes(result, multiWarnLimit))
1109 : : {
4609 alvherre@alvh.no-ip. 1110 : 0 : char *oldest_datname = get_database_name(oldest_datoid);
1111 : :
1112 : : /* complain even if that DB has disappeared */
1113 [ # # ]: 0 : if (oldest_datname)
1114 [ # # ]: 0 : ereport(WARNING,
1115 : : (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1116 : : "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1117 : : multiWrapLimit - result,
1118 : : oldest_datname,
1119 : : multiWrapLimit - result),
1120 : : errhint("Execute a database-wide VACUUM in that database.\n"
1121 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1122 : : else
1123 [ # # ]: 0 : ereport(WARNING,
1124 : : (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1125 : : "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1126 : : multiWrapLimit - result,
1127 : : oldest_datoid,
1128 : : multiWrapLimit - result),
1129 : : errhint("Execute a database-wide VACUUM in that database.\n"
1130 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1131 : : }
1132 : :
1133 : : /* Re-acquire lock and start over */
1134 : 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1135 : 0 : result = MultiXactState->nextMXact;
1136 [ # # ]: 0 : if (result < FirstMultiXactId)
1137 : 0 : result = FirstMultiXactId;
1138 : : }
1139 : :
1140 : : /* Make sure there is room for the MXID in the file. */
7395 tgl@sss.pgh.pa.us 1141 :CBC 294 : ExtendMultiXactOffset(result);
1142 : :
1143 : : /*
1144 : : * Reserve the members space, similarly to above. Also, be careful not to
1145 : : * return zero as the starting offset for any multixact. See
1146 : : * GetMultiXactIdMembers() for motivation.
1147 : : */
7253 1148 : 294 : nextOffset = MultiXactState->nextOffset;
1149 [ + + ]: 294 : if (nextOffset == 0)
1150 : : {
1151 : 10 : *offset = 1;
4609 alvherre@alvh.no-ip. 1152 : 10 : nmembers++; /* allocate member slot 0 too */
1153 : : }
1154 : : else
7253 tgl@sss.pgh.pa.us 1155 : 284 : *offset = nextOffset;
1156 : :
1157 : : /*----------
1158 : : * Protect against overrun of the members space as well, with the
1159 : : * following rules:
1160 : : *
1161 : : * If we're past offsetStopLimit, refuse to generate more multis.
1162 : : * If we're close to offsetStopLimit, emit a warning.
1163 : : *
1164 : : * Arbitrarily, we start emitting warnings when we're 20 segments or less
1165 : : * from offsetStopLimit.
1166 : : *
1167 : : * Note we haven't updated the shared state yet, so if we fail at this
1168 : : * point, the multixact ID we grabbed can still be used by the next guy.
1169 : : *
1170 : : * Note that there is no point in forcing autovacuum runs here: the
1171 : : * multixact freeze settings would have to be reduced for that to have any
1172 : : * effect.
1173 : : *----------
1174 : : */
1175 : : #define OFFSET_WARN_SEGMENTS 20
3633 andres@anarazel.de 1176 [ + - - + ]: 588 : if (MultiXactState->oldestOffsetKnown &&
3746 rhaas@postgresql.org 1177 : 294 : MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
1178 : : nmembers))
1179 : : {
1180 : : /* see comment in the corresponding offsets wraparound case */
3730 andres@anarazel.de 1181 :UBC 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1182 : :
3784 alvherre@alvh.no-ip. 1183 [ # # ]: 0 : ereport(ERROR,
1184 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1185 : : errmsg("multixact \"members\" limit exceeded"),
1186 : : errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
1187 : : "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
1188 : : MultiXactState->offsetStopLimit - nextOffset - 1,
1189 : : nmembers,
1190 : : MultiXactState->offsetStopLimit - nextOffset - 1),
1191 : : errhint("Execute a database-wide VACUUM in database with OID %u with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.",
1192 : : MultiXactState->oldestMultiXactDB)));
1193 : : }
1194 : :
1195 : : /*
1196 : : * Check whether we should kick autovacuum into action, to prevent members
1197 : : * wraparound. NB we use a much larger window to trigger autovacuum than
1198 : : * just the warning limit. The warning is just a measure of last resort -
1199 : : * this is in line with GetNewTransactionId's behaviour.
1200 : : */
3730 andres@anarazel.de 1201 [ + - ]:CBC 294 : if (!MultiXactState->oldestOffsetKnown ||
1202 : 294 : (MultiXactState->nextOffset - MultiXactState->oldestOffset
1203 [ - + ]: 294 : > MULTIXACT_MEMBER_SAFE_THRESHOLD))
1204 : : {
1205 : : /*
1206 : : * To avoid swamping the postmaster with signals, we issue the autovac
1207 : : * request only when crossing a segment boundary. With default
1208 : : * compilation settings that's roughly after 50k members. This still
1209 : : * gives plenty of chances before we get into real trouble.
1210 : : */
3730 andres@anarazel.de 1211 :UBC 0 : if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
1212 [ # # ]: 0 : (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
1213 : 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1214 : : }
1215 : :
3633 andres@anarazel.de 1216 [ + - - + ]:CBC 588 : if (MultiXactState->oldestOffsetKnown &&
3730 1217 : 294 : MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
1218 : : nextOffset,
1219 : : nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
3784 alvherre@alvh.no-ip. 1220 [ # # ]:UBC 0 : ereport(WARNING,
1221 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1222 : : errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
1223 : : "database with OID %u must be vacuumed before %d more multixact members are used",
1224 : : MultiXactState->offsetStopLimit - nextOffset + nmembers,
1225 : : MultiXactState->oldestMultiXactDB,
1226 : : MultiXactState->offsetStopLimit - nextOffset + nmembers),
1227 : : errhint("Execute a database-wide VACUUM in that database with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.")));
1228 : :
4609 alvherre@alvh.no-ip. 1229 :CBC 294 : ExtendMultiXactMember(nextOffset, nmembers);
1230 : :
1231 : : /*
1232 : : * Critical section from here until caller has written the data into the
1233 : : * just-reserved SLRU space; we don't want to error out with a partly
1234 : : * written MultiXact structure. (In particular, failing to write our
1235 : : * start offset after advancing nextMXact would effectively corrupt the
1236 : : * previous MultiXact.)
1237 : : */
7253 tgl@sss.pgh.pa.us 1238 : 294 : START_CRIT_SECTION();
1239 : :
1240 : : /*
1241 : : * Advance counters. As in GetNewTransactionId(), this must not happen
1242 : : * until after file extension has succeeded!
1243 : : *
1244 : : * We don't care about MultiXactId wraparound here; it will be handled by
1245 : : * the next iteration. But note that nextMXact may be InvalidMultiXactId
1246 : : * or the first value on a segment-beginning page after this routine
1247 : : * exits, so anyone else looking at the variable must be prepared to deal
1248 : : * with either case. Similarly, nextOffset may be zero, but we won't use
1249 : : * that as the actual start offset of the next multixact.
1250 : : */
1251 : 294 : (MultiXactState->nextMXact)++;
1252 : :
4609 alvherre@alvh.no-ip. 1253 : 294 : MultiXactState->nextOffset += nmembers;
1254 : :
7436 tgl@sss.pgh.pa.us 1255 : 294 : LWLockRelease(MultiXactGenLock);
1256 : :
1257 : : debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
1258 : 294 : return result;
1259 : : }
1260 : :
1261 : : /*
1262 : : * GetMultiXactIdMembers
1263 : : * Return the set of MultiXactMembers that make up a MultiXactId
1264 : : *
1265 : : * Return value is the number of members found, or -1 if there are none,
1266 : : * and *members is set to a newly palloc'ed array of members. It's the
1267 : : * caller's responsibility to free it when done with it.
1268 : : *
1269 : : * from_pgupgrade must be passed as true if and only if only the multixact
1270 : : * corresponds to a value from a tuple that was locked in a 9.2-or-older
1271 : : * installation and later pg_upgrade'd (that is, the infomask is
1272 : : * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1273 : : * can still be running, so we return -1 just like for an empty multixact
1274 : : * without any further checking. It would be wrong to try to resolve such a
1275 : : * multixact: either the multixact is within the current valid multixact
1276 : : * range, in which case the returned result would be bogus, or outside that
1277 : : * range, in which case an error would be raised.
1278 : : *
1279 : : * In all other cases, the passed multixact must be within the known valid
1280 : : * range, that is, greater than or equal to oldestMultiXactId, and less than
1281 : : * nextMXact. Otherwise, an error is raised.
1282 : : *
1283 : : * isLockOnly must be set to true if caller is certain that the given multi
1284 : : * is used only to lock tuples; can be false without loss of correctness,
1285 : : * but passing a true means we can return quickly without checking for
1286 : : * old updates.
1287 : : */
1288 : : int
4609 alvherre@alvh.no-ip. 1289 : 3102 : GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
1290 : : bool from_pgupgrade, bool isLockOnly)
1291 : : {
1292 : : int64 pageno;
1293 : : int64 prev_pageno;
1294 : : int entryno;
1295 : : int slotno;
1296 : : MultiXactOffset *offptr;
1297 : : MultiXactOffset offset;
1298 : : int length;
1299 : : int truelength;
1300 : : MultiXactId oldestMXact;
1301 : : MultiXactId nextMXact;
1302 : : MultiXactId tmpMXact;
1303 : : MultiXactOffset nextOffset;
1304 : : MultiXactMember *ptr;
1305 : : LWLock *lock;
517 1306 : 3102 : bool slept = false;
1307 : :
1308 : : debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1309 : :
3361 1310 [ + - - + ]: 3102 : if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1311 : : {
1542 heikki.linnakangas@i 1312 :UBC 0 : *members = NULL;
4300 alvherre@alvh.no-ip. 1313 : 0 : return -1;
1314 : : }
1315 : :
1316 : : /* See if the MultiXactId is in the local cache */
4609 alvherre@alvh.no-ip. 1317 :CBC 3102 : length = mXactCacheGetById(multi, members);
7436 tgl@sss.pgh.pa.us 1318 [ + + ]: 3102 : if (length >= 0)
1319 : : {
1320 : : debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1321 : : mxid_to_string(multi, length, *members));
1322 : 2183 : return length;
1323 : : }
1324 : :
1325 : : /* Set our OldestVisibleMXactId[] entry if we didn't already */
1326 : 919 : MultiXactIdSetOldestVisible();
1327 : :
1328 : : /*
1329 : : * If we know the multi is used only for locking and not for updates, then
1330 : : * we can skip checking if the value is older than our oldest visible
1331 : : * multi. It cannot possibly still be running.
1332 : : */
1083 pg@bowt.ie 1333 [ + + + + ]: 1684 : if (isLockOnly &&
552 heikki.linnakangas@i 1334 : 765 : MultiXactIdPrecedes(multi, OldestVisibleMXactId[MyProcNumber]))
1335 : : {
1336 : : debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
4057 alvherre@alvh.no-ip. 1337 : 678 : *members = NULL;
1338 : 678 : return -1;
1339 : : }
1340 : :
1341 : : /*
1342 : : * We check known limits on MultiXact before resorting to the SLRU area.
1343 : : *
1344 : : * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1345 : : * useful; it has already been removed, or will be removed shortly, by
1346 : : * truncation. If one is passed, an error is raised.
1347 : : *
1348 : : * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1349 : : * implies undetected ID wraparound has occurred. This raises a hard
1350 : : * error.
1351 : : *
1352 : : * Shared lock is enough here since we aren't modifying any global state.
1353 : : * Acquire it just long enough to grab the current counter values. We may
1354 : : * need both nextMXact and nextOffset; see below.
1355 : : */
7436 tgl@sss.pgh.pa.us 1356 : 241 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1357 : :
4609 alvherre@alvh.no-ip. 1358 : 241 : oldestMXact = MultiXactState->oldestMultiXactId;
7253 tgl@sss.pgh.pa.us 1359 : 241 : nextMXact = MultiXactState->nextMXact;
1360 : 241 : nextOffset = MultiXactState->nextOffset;
1361 : :
1362 : 241 : LWLockRelease(MultiXactGenLock);
1363 : :
4609 alvherre@alvh.no-ip. 1364 [ - + ]: 241 : if (MultiXactIdPrecedes(multi, oldestMXact))
3361 alvherre@alvh.no-ip. 1365 [ # # ]:UBC 0 : ereport(ERROR,
1366 : : (errcode(ERRCODE_INTERNAL_ERROR),
1367 : : errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1368 : : multi)));
1369 : :
4609 alvherre@alvh.no-ip. 1370 [ + - ]:CBC 241 : if (!MultiXactIdPrecedes(multi, nextMXact))
4609 alvherre@alvh.no-ip. 1371 [ # # ]:UBC 0 : ereport(ERROR,
1372 : : (errcode(ERRCODE_INTERNAL_ERROR),
1373 : : errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1374 : : multi)));
1375 : :
1376 : : /*
1377 : : * Find out the offset at which we need to start reading MultiXactMembers
1378 : : * and the number of members in the multixact. We determine the latter as
1379 : : * the difference between this multixact's starting offset and the next
1380 : : * one's. However, there are some corner cases to worry about:
1381 : : *
1382 : : * 1. This multixact may be the latest one created, in which case there is
1383 : : * no next one to look at. In this case the nextOffset value we just
1384 : : * saved is the correct endpoint.
1385 : : *
1386 : : * 2. The next multixact may still be in process of being filled in: that
1387 : : * is, another process may have done GetNewMultiXactId but not yet written
1388 : : * the offset entry for that ID. In that scenario, it is guaranteed that
1389 : : * the offset entry for that multixact exists (because GetNewMultiXactId
1390 : : * won't release MultiXactGenLock until it does) but contains zero
1391 : : * (because we are careful to pre-zero offset pages). Because
1392 : : * GetNewMultiXactId will never return zero as the starting offset for a
1393 : : * multixact, when we read zero as the next multixact's offset, we know we
1394 : : * have this case. We handle this by sleeping on the condition variable
1395 : : * we have just for this; the process in charge will signal the CV as soon
1396 : : * as it has finished writing the multixact offset.
1397 : : *
1398 : : * 3. Because GetNewMultiXactId increments offset zero to offset one to
1399 : : * handle case #2, there is an ambiguity near the point of offset
1400 : : * wraparound. If we see next multixact's offset is one, is that our
1401 : : * multixact's actual endpoint, or did it end at zero with a subsequent
1402 : : * increment? We handle this using the knowledge that if the zero'th
1403 : : * member slot wasn't filled, it'll contain zero, and zero isn't a valid
1404 : : * transaction ID so it can't be a multixact member. Therefore, if we
1405 : : * read a zero from the members array, just ignore it.
1406 : : *
1407 : : * This is all pretty messy, but the mess occurs only in infrequent corner
1408 : : * cases, so it seems better than holding the MultiXactGenLock for a long
1409 : : * time on every multixact creation.
1410 : : */
7253 tgl@sss.pgh.pa.us 1411 :CBC 241 : retry:
7436 1412 : 241 : pageno = MultiXactIdToOffsetPage(multi);
1413 : 241 : entryno = MultiXactIdToOffsetEntry(multi);
1414 : :
1415 : : /* Acquire the bank lock for the page we need. */
556 alvherre@alvh.no-ip. 1416 : 241 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
551 1417 : 241 : LWLockAcquire(lock, LW_EXCLUSIVE);
1418 : :
6611 tgl@sss.pgh.pa.us 1419 : 241 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
7395 1420 : 241 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7436 1421 : 241 : offptr += entryno;
1422 : 241 : offset = *offptr;
1423 : :
7253 1424 [ - + ]: 241 : Assert(offset != 0);
1425 : :
1426 : : /*
1427 : : * Use the same increment rule as GetNewMultiXactId(), that is, don't
1428 : : * handle wraparound explicitly until needed.
1429 : : */
7436 1430 : 241 : tmpMXact = multi + 1;
1431 : :
1432 [ + + ]: 241 : if (nextMXact == tmpMXact)
1433 : : {
1434 : : /* Corner case 1: there is no next multixact */
1435 : 213 : length = nextOffset - offset;
1436 : : }
1437 : : else
1438 : : {
1439 : : MultiXactOffset nextMXOffset;
1440 : :
1441 : : /* handle wraparound if needed */
1442 [ - + ]: 28 : if (tmpMXact < FirstMultiXactId)
7436 tgl@sss.pgh.pa.us 1443 :UBC 0 : tmpMXact = FirstMultiXactId;
1444 : :
7436 tgl@sss.pgh.pa.us 1445 :CBC 28 : prev_pageno = pageno;
1446 : :
1447 : 28 : pageno = MultiXactIdToOffsetPage(tmpMXact);
1448 : 28 : entryno = MultiXactIdToOffsetEntry(tmpMXact);
1449 : :
1450 [ - + ]: 28 : if (pageno != prev_pageno)
1451 : : {
1452 : : LWLock *newlock;
1453 : :
1454 : : /*
1455 : : * Since we're going to access a different SLRU page, if this page
1456 : : * falls under a different bank, release the old bank's lock and
1457 : : * acquire the lock of the new bank.
1458 : : */
551 alvherre@alvh.no-ip. 1459 :UBC 0 : newlock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1460 [ # # ]: 0 : if (newlock != lock)
1461 : : {
1462 : 0 : LWLockRelease(lock);
1463 : 0 : LWLockAcquire(newlock, LW_EXCLUSIVE);
1464 : 0 : lock = newlock;
1465 : : }
6611 tgl@sss.pgh.pa.us 1466 : 0 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
1467 : : }
1468 : :
7395 tgl@sss.pgh.pa.us 1469 :CBC 28 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7436 1470 : 28 : offptr += entryno;
7253 1471 : 28 : nextMXOffset = *offptr;
1472 : :
1473 [ - + ]: 28 : if (nextMXOffset == 0)
1474 : : {
1475 : : /* Corner case 2: next multixact is still being filled in */
551 alvherre@alvh.no-ip. 1476 :UBC 0 : LWLockRelease(lock);
3949 1477 [ # # ]: 0 : CHECK_FOR_INTERRUPTS();
1478 : :
1479 : : INJECTION_POINT("multixact-get-members-cv-sleep", NULL);
1480 : :
517 1481 : 0 : ConditionVariableSleep(&MultiXactState->nextoff_cv,
1482 : : WAIT_EVENT_MULTIXACT_CREATION);
1483 : 0 : slept = true;
7253 tgl@sss.pgh.pa.us 1484 : 0 : goto retry;
1485 : : }
1486 : :
7253 tgl@sss.pgh.pa.us 1487 :CBC 28 : length = nextMXOffset - offset;
1488 : : }
1489 : :
551 alvherre@alvh.no-ip. 1490 : 241 : LWLockRelease(lock);
1491 : 241 : lock = NULL;
1492 : :
1493 : : /*
1494 : : * If we slept above, clean up state; it's no longer needed.
1495 : : */
517 1496 [ - + ]: 241 : if (slept)
517 alvherre@alvh.no-ip. 1497 :UBC 0 : ConditionVariableCancelSleep();
1498 : :
4609 alvherre@alvh.no-ip. 1499 :CBC 241 : ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
1500 : :
7253 tgl@sss.pgh.pa.us 1501 : 241 : truelength = 0;
7436 1502 : 241 : prev_pageno = -1;
551 alvherre@alvh.no-ip. 1503 [ + + ]: 867 : for (int i = 0; i < length; i++, offset++)
1504 : : {
1505 : : TransactionId *xactptr;
1506 : : uint32 *flagsptr;
1507 : : int flagsoff;
1508 : : int bshift;
1509 : : int memberoff;
1510 : :
7436 tgl@sss.pgh.pa.us 1511 : 626 : pageno = MXOffsetToMemberPage(offset);
4609 alvherre@alvh.no-ip. 1512 : 626 : memberoff = MXOffsetToMemberOffset(offset);
1513 : :
7436 tgl@sss.pgh.pa.us 1514 [ + + ]: 626 : if (pageno != prev_pageno)
1515 : : {
1516 : : LWLock *newlock;
1517 : :
1518 : : /*
1519 : : * Since we're going to access a different SLRU page, if this page
1520 : : * falls under a different bank, release the old bank's lock and
1521 : : * acquire the lock of the new bank.
1522 : : */
551 alvherre@alvh.no-ip. 1523 : 241 : newlock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1524 [ + - ]: 241 : if (newlock != lock)
1525 : : {
1526 [ - + ]: 241 : if (lock)
551 alvherre@alvh.no-ip. 1527 :UBC 0 : LWLockRelease(lock);
551 alvherre@alvh.no-ip. 1528 :CBC 241 : LWLockAcquire(newlock, LW_EXCLUSIVE);
1529 : 241 : lock = newlock;
1530 : : }
1531 : :
6611 tgl@sss.pgh.pa.us 1532 : 241 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
7436 1533 : 241 : prev_pageno = pageno;
1534 : : }
1535 : :
1536 : 626 : xactptr = (TransactionId *)
4609 alvherre@alvh.no-ip. 1537 : 626 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1538 : :
7253 tgl@sss.pgh.pa.us 1539 [ - + ]: 626 : if (!TransactionIdIsValid(*xactptr))
1540 : : {
1541 : : /* Corner case 3: we must be looking at unused slot zero */
7253 tgl@sss.pgh.pa.us 1542 [ # # ]:UBC 0 : Assert(offset == 0);
1543 : 0 : continue;
1544 : : }
1545 : :
4609 alvherre@alvh.no-ip. 1546 :CBC 626 : flagsoff = MXOffsetToFlagsOffset(offset);
1547 : 626 : bshift = MXOffsetToFlagsBitShift(offset);
1548 : 626 : flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1549 : :
1550 : 626 : ptr[truelength].xid = *xactptr;
1551 : 626 : ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
1552 : 626 : truelength++;
1553 : : }
1554 : :
551 1555 : 241 : LWLockRelease(lock);
1556 : :
1557 : : /* A multixid with zero members should not happen */
1542 heikki.linnakangas@i 1558 [ - + ]: 241 : Assert(truelength > 0);
1559 : :
1560 : : /*
1561 : : * Copy the result into the local cache.
1562 : : */
7253 tgl@sss.pgh.pa.us 1563 : 241 : mXactCachePut(multi, truelength, ptr);
1564 : :
1565 : : debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1566 : : mxid_to_string(multi, truelength, ptr));
1542 heikki.linnakangas@i 1567 : 241 : *members = ptr;
7253 tgl@sss.pgh.pa.us 1568 : 241 : return truelength;
1569 : : }
1570 : :
1571 : : /*
1572 : : * mxactMemberComparator
1573 : : * qsort comparison function for MultiXactMember
1574 : : *
1575 : : * We can't use wraparound comparison for XIDs because that does not respect
1576 : : * the triangle inequality! Any old sort order will do.
1577 : : */
1578 : : static int
4609 alvherre@alvh.no-ip. 1579 : 1992 : mxactMemberComparator(const void *arg1, const void *arg2)
1580 : : {
1581 : 1992 : MultiXactMember member1 = *(const MultiXactMember *) arg1;
1582 : 1992 : MultiXactMember member2 = *(const MultiXactMember *) arg2;
1583 : :
1584 [ + + ]: 1992 : if (member1.xid > member2.xid)
1585 : 22 : return 1;
1586 [ + + ]: 1970 : if (member1.xid < member2.xid)
1587 : 1803 : return -1;
1588 [ - + ]: 167 : if (member1.status > member2.status)
4609 alvherre@alvh.no-ip. 1589 :UBC 0 : return 1;
4609 alvherre@alvh.no-ip. 1590 [ + - ]:CBC 167 : if (member1.status < member2.status)
1591 : 167 : return -1;
4609 alvherre@alvh.no-ip. 1592 :UBC 0 : return 0;
1593 : : }
1594 : :
1595 : : /*
1596 : : * mXactCacheGetBySet
1597 : : * returns a MultiXactId from the cache based on the set of
1598 : : * TransactionIds that compose it, or InvalidMultiXactId if
1599 : : * none matches.
1600 : : *
1601 : : * This is helpful, for example, if two transactions want to lock a huge
1602 : : * table. By using the cache, the second will use the same MultiXactId
1603 : : * for the majority of tuples, thus keeping MultiXactId usage low (saving
1604 : : * both I/O and wraparound issues).
1605 : : *
1606 : : * NB: the passed members array will be sorted in-place.
1607 : : */
1608 : : static MultiXactId
4609 alvherre@alvh.no-ip. 1609 :CBC 1143 : mXactCacheGetBySet(int nmembers, MultiXactMember *members)
1610 : : {
1611 : : dlist_iter iter;
1612 : :
1613 : : debug_elog3(DEBUG2, "CacheGet: looking for %s",
1614 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
1615 : :
1616 : : /* sort the array so comparison is easy */
1617 : 1143 : qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1618 : :
1039 drowley@postgresql.o 1619 [ + - + + ]: 1395 : dclist_foreach(iter, &MXactCache)
1620 : : {
1621 : 1101 : mXactCacheEnt *entry = dclist_container(mXactCacheEnt, node,
1622 : : iter.cur);
1623 : :
4609 alvherre@alvh.no-ip. 1624 [ + + ]: 1101 : if (entry->nmembers != nmembers)
7436 tgl@sss.pgh.pa.us 1625 : 136 : continue;
1626 : :
1627 : : /*
1628 : : * We assume the cache entries are sorted, and that the unused bits in
1629 : : * "status" are zeroed.
1630 : : */
4609 alvherre@alvh.no-ip. 1631 [ + + ]: 965 : if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1632 : : {
1633 : : debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1039 drowley@postgresql.o 1634 : 849 : dclist_move_head(&MXactCache, iter.cur);
7436 tgl@sss.pgh.pa.us 1635 : 849 : return entry->multi;
1636 : : }
1637 : : }
1638 : :
1639 : : debug_elog2(DEBUG2, "CacheGet: not found :-(");
1640 : 294 : return InvalidMultiXactId;
1641 : : }
1642 : :
1643 : : /*
1644 : : * mXactCacheGetById
1645 : : * returns the composing MultiXactMember set from the cache for a
1646 : : * given MultiXactId, if present.
1647 : : *
1648 : : * If successful, *xids is set to the address of a palloc'd copy of the
1649 : : * MultiXactMember set. Return value is number of members, or -1 on failure.
1650 : : */
1651 : : static int
4609 alvherre@alvh.no-ip. 1652 : 3102 : mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
1653 : : {
1654 : : dlist_iter iter;
1655 : :
1656 : : debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1657 : :
1039 drowley@postgresql.o 1658 [ + - + + ]: 3474 : dclist_foreach(iter, &MXactCache)
1659 : : {
1660 : 2555 : mXactCacheEnt *entry = dclist_container(mXactCacheEnt, node,
1661 : : iter.cur);
1662 : :
7436 tgl@sss.pgh.pa.us 1663 [ + + ]: 2555 : if (entry->multi == multi)
1664 : : {
1665 : : MultiXactMember *ptr;
1666 : : Size size;
1667 : :
4609 alvherre@alvh.no-ip. 1668 : 2183 : size = sizeof(MultiXactMember) * entry->nmembers;
1669 : 2183 : ptr = (MultiXactMember *) palloc(size);
1670 : :
1671 : 2183 : memcpy(ptr, entry->members, size);
1672 : :
1673 : : debug_elog3(DEBUG2, "CacheGet: found %s",
1674 : : mxid_to_string(multi,
1675 : : entry->nmembers,
1676 : : entry->members));
1677 : :
1678 : : /*
1679 : : * Note we modify the list while not using a modifiable iterator.
1680 : : * This is acceptable only because we exit the iteration
1681 : : * immediately afterwards.
1682 : : */
1039 drowley@postgresql.o 1683 : 2183 : dclist_move_head(&MXactCache, iter.cur);
1684 : :
1542 heikki.linnakangas@i 1685 : 2183 : *members = ptr;
4609 alvherre@alvh.no-ip. 1686 : 2183 : return entry->nmembers;
1687 : : }
1688 : : }
1689 : :
1690 : : debug_elog2(DEBUG2, "CacheGet: not found");
7436 tgl@sss.pgh.pa.us 1691 : 919 : return -1;
1692 : : }
1693 : :
1694 : : /*
1695 : : * mXactCachePut
1696 : : * Add a new MultiXactId and its composing set into the local cache.
1697 : : */
1698 : : static void
4609 alvherre@alvh.no-ip. 1699 : 535 : mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1700 : : {
1701 : : mXactCacheEnt *entry;
1702 : :
1703 : : debug_elog3(DEBUG2, "CachePut: storing %s",
1704 : : mxid_to_string(multi, nmembers, members));
1705 : :
7436 tgl@sss.pgh.pa.us 1706 [ + + ]: 535 : if (MXactContext == NULL)
1707 : : {
1708 : : /* The cache only lives as long as the current transaction */
1709 : : debug_elog2(DEBUG2, "CachePut: initializing memory context");
1710 : 380 : MXactContext = AllocSetContextCreate(TopTransactionContext,
1711 : : "MultiXact cache context",
1712 : : ALLOCSET_SMALL_SIZES);
1713 : : }
1714 : :
1715 : : entry = (mXactCacheEnt *)
1716 : 535 : MemoryContextAlloc(MXactContext,
4609 alvherre@alvh.no-ip. 1717 : 535 : offsetof(mXactCacheEnt, members) +
1718 : : nmembers * sizeof(MultiXactMember));
1719 : :
7436 tgl@sss.pgh.pa.us 1720 : 535 : entry->multi = multi;
4609 alvherre@alvh.no-ip. 1721 : 535 : entry->nmembers = nmembers;
1722 : 535 : memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1723 : :
1724 : : /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1725 : 535 : qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1726 : :
1039 drowley@postgresql.o 1727 : 535 : dclist_push_head(&MXactCache, &entry->node);
1728 [ - + ]: 535 : if (dclist_count(&MXactCache) > MAX_CACHE_ENTRIES)
1729 : : {
1730 : : dlist_node *node;
1731 : :
1039 drowley@postgresql.o 1732 :UBC 0 : node = dclist_tail_node(&MXactCache);
1733 : 0 : dclist_delete_from(&MXactCache, node);
1734 : :
1735 : 0 : entry = dclist_container(mXactCacheEnt, node, node);
1736 : : debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1737 : : entry->multi);
1738 : :
4285 alvherre@alvh.no-ip. 1739 : 0 : pfree(entry);
1740 : : }
7436 tgl@sss.pgh.pa.us 1741 :CBC 535 : }
1742 : :
1743 : : char *
4609 alvherre@alvh.no-ip. 1744 :UBC 0 : mxstatus_to_string(MultiXactStatus status)
1745 : : {
1746 [ # # # # : 0 : switch (status)
# # # ]
1747 : : {
1748 : 0 : case MultiXactStatusForKeyShare:
1749 : 0 : return "keysh";
1750 : 0 : case MultiXactStatusForShare:
1751 : 0 : return "sh";
1752 : 0 : case MultiXactStatusForNoKeyUpdate:
1753 : 0 : return "fornokeyupd";
1754 : 0 : case MultiXactStatusForUpdate:
1755 : 0 : return "forupd";
1756 : 0 : case MultiXactStatusNoKeyUpdate:
1757 : 0 : return "nokeyupd";
1758 : 0 : case MultiXactStatusUpdate:
1759 : 0 : return "upd";
1760 : 0 : default:
1761 [ # # ]: 0 : elog(ERROR, "unrecognized multixact status %d", status);
1762 : : return "";
1763 : : }
1764 : : }
1765 : :
1766 : : char *
1767 : 0 : mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1768 : : {
1769 : : static char *str = NULL;
1770 : : StringInfoData buf;
1771 : : int i;
1772 : :
1773 [ # # ]: 0 : if (str != NULL)
1774 : 0 : pfree(str);
1775 : :
1776 : 0 : initStringInfo(&buf);
1777 : :
1778 : 0 : appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1779 : : mxstatus_to_string(members[0].status));
1780 : :
1781 [ # # ]: 0 : for (i = 1; i < nmembers; i++)
1782 : 0 : appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1783 : 0 : mxstatus_to_string(members[i].status));
1784 : :
1785 : 0 : appendStringInfoChar(&buf, ']');
1786 : 0 : str = MemoryContextStrdup(TopMemoryContext, buf.data);
1787 : 0 : pfree(buf.data);
7436 tgl@sss.pgh.pa.us 1788 : 0 : return str;
1789 : : }
1790 : :
1791 : : /*
1792 : : * AtEOXact_MultiXact
1793 : : * Handle transaction end for MultiXact
1794 : : *
1795 : : * This is called at top transaction commit or abort (we don't care which).
1796 : : */
1797 : : void
7436 tgl@sss.pgh.pa.us 1798 :CBC 316732 : AtEOXact_MultiXact(void)
1799 : : {
1800 : : /*
1801 : : * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1802 : : * which should only be valid while within a transaction.
1803 : : *
1804 : : * We assume that storing a MultiXactId is atomic and so we need not take
1805 : : * MultiXactGenLock to do this.
1806 : : */
552 heikki.linnakangas@i 1807 : 316732 : OldestMemberMXactId[MyProcNumber] = InvalidMultiXactId;
1808 : 316732 : OldestVisibleMXactId[MyProcNumber] = InvalidMultiXactId;
1809 : :
1810 : : /*
1811 : : * Discard the local MultiXactId cache. Since MXactContext was created as
1812 : : * a child of TopTransactionContext, we needn't delete it explicitly.
1813 : : */
7436 tgl@sss.pgh.pa.us 1814 : 316732 : MXactContext = NULL;
1039 drowley@postgresql.o 1815 : 316732 : dclist_init(&MXactCache);
7436 tgl@sss.pgh.pa.us 1816 : 316732 : }
1817 : :
1818 : : /*
1819 : : * AtPrepare_MultiXact
1820 : : * Save multixact state at 2PC transaction prepare
1821 : : *
1822 : : * In this phase, we only store our OldestMemberMXactId value in the two-phase
1823 : : * state file.
1824 : : */
1825 : : void
5766 heikki.linnakangas@i 1826 : 287 : AtPrepare_MultiXact(void)
1827 : : {
552 1828 : 287 : MultiXactId myOldestMember = OldestMemberMXactId[MyProcNumber];
1829 : :
5766 1830 [ + + ]: 287 : if (MultiXactIdIsValid(myOldestMember))
1831 : 49 : RegisterTwoPhaseRecord(TWOPHASE_RM_MULTIXACT_ID, 0,
1832 : : &myOldestMember, sizeof(MultiXactId));
1833 : 287 : }
1834 : :
1835 : : /*
1836 : : * PostPrepare_MultiXact
1837 : : * Clean up after successful PREPARE TRANSACTION
1838 : : */
1839 : : void
61 michael@paquier.xyz 1840 :GNC 287 : PostPrepare_MultiXact(FullTransactionId fxid)
1841 : : {
1842 : : MultiXactId myOldestMember;
1843 : :
1844 : : /*
1845 : : * Transfer our OldestMemberMXactId value to the slot reserved for the
1846 : : * prepared transaction.
1847 : : */
552 heikki.linnakangas@i 1848 :CBC 287 : myOldestMember = OldestMemberMXactId[MyProcNumber];
5766 1849 [ + + ]: 287 : if (MultiXactIdIsValid(myOldestMember))
1850 : : {
61 michael@paquier.xyz 1851 :GNC 49 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1852 : :
1853 : : /*
1854 : : * Even though storing MultiXactId is atomic, acquire lock to make
1855 : : * sure others see both changes, not just the reset of the slot of the
1856 : : * current backend. Using a volatile pointer might suffice, but this
1857 : : * isn't a hot spot.
1858 : : */
5766 heikki.linnakangas@i 1859 :CBC 49 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1860 : :
552 1861 : 49 : OldestMemberMXactId[dummyProcNumber] = myOldestMember;
1862 : 49 : OldestMemberMXactId[MyProcNumber] = InvalidMultiXactId;
1863 : :
5766 1864 : 49 : LWLockRelease(MultiXactGenLock);
1865 : : }
1866 : :
1867 : : /*
1868 : : * We don't need to transfer OldestVisibleMXactId value, because the
1869 : : * transaction is not going to be looking at any more multixacts once it's
1870 : : * prepared.
1871 : : *
1872 : : * We assume that storing a MultiXactId is atomic and so we need not take
1873 : : * MultiXactGenLock to do this.
1874 : : */
552 1875 : 287 : OldestVisibleMXactId[MyProcNumber] = InvalidMultiXactId;
1876 : :
1877 : : /*
1878 : : * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1879 : : */
5766 1880 : 287 : MXactContext = NULL;
1039 drowley@postgresql.o 1881 : 287 : dclist_init(&MXactCache);
5766 heikki.linnakangas@i 1882 : 287 : }
1883 : :
1884 : : /*
1885 : : * multixact_twophase_recover
1886 : : * Recover the state of a prepared transaction at startup
1887 : : */
1888 : : void
61 michael@paquier.xyz 1889 :GNC 8 : multixact_twophase_recover(FullTransactionId fxid, uint16 info,
1890 : : void *recdata, uint32 len)
1891 : : {
1892 : 8 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1893 : : MultiXactId oldestMember;
1894 : :
1895 : : /*
1896 : : * Get the oldest member XID from the state file record, and set it in the
1897 : : * OldestMemberMXactId slot reserved for this prepared transaction.
1898 : : */
5766 heikki.linnakangas@i 1899 [ - + ]:CBC 8 : Assert(len == sizeof(MultiXactId));
5671 bruce@momjian.us 1900 : 8 : oldestMember = *((MultiXactId *) recdata);
1901 : :
552 heikki.linnakangas@i 1902 : 8 : OldestMemberMXactId[dummyProcNumber] = oldestMember;
5766 1903 : 8 : }
1904 : :
1905 : : /*
1906 : : * multixact_twophase_postcommit
1907 : : * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1908 : : */
1909 : : void
61 michael@paquier.xyz 1910 :GNC 53 : multixact_twophase_postcommit(FullTransactionId fxid, uint16 info,
1911 : : void *recdata, uint32 len)
1912 : : {
1913 : 53 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true);
1914 : :
5766 heikki.linnakangas@i 1915 [ - + ]:CBC 53 : Assert(len == sizeof(MultiXactId));
1916 : :
552 1917 : 53 : OldestMemberMXactId[dummyProcNumber] = InvalidMultiXactId;
5766 1918 : 53 : }
1919 : :
1920 : : /*
1921 : : * multixact_twophase_postabort
1922 : : * This is actually just the same as the COMMIT case.
1923 : : */
1924 : : void
61 michael@paquier.xyz 1925 :GNC 22 : multixact_twophase_postabort(FullTransactionId fxid, uint16 info,
1926 : : void *recdata, uint32 len)
1927 : : {
1928 : 22 : multixact_twophase_postcommit(fxid, info, recdata, len);
5766 heikki.linnakangas@i 1929 :CBC 22 : }
1930 : :
1931 : : /*
1932 : : * Initialization of shared memory for MultiXact. We use two SLRU areas,
1933 : : * thus double memory. Also, reserve space for the shared MultiXactState
1934 : : * struct and the per-backend MultiXactId arrays (two of those, too).
1935 : : */
1936 : : Size
7436 tgl@sss.pgh.pa.us 1937 : 1909 : MultiXactShmemSize(void)
1938 : : {
1939 : : Size size;
1940 : :
1941 : : /* We need 2*MaxOldestSlot perBackendXactIds[] entries */
1942 : : #define SHARED_MULTIXACT_STATE_SIZE \
1943 : : add_size(offsetof(MultiXactStateData, perBackendXactIds), \
1944 : : mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
1945 : :
7322 1946 : 1909 : size = SHARED_MULTIXACT_STATE_SIZE;
556 alvherre@alvh.no-ip. 1947 : 1909 : size = add_size(size, SimpleLruShmemSize(multixact_offset_buffers, 0));
1948 : 1909 : size = add_size(size, SimpleLruShmemSize(multixact_member_buffers, 0));
1949 : :
7322 tgl@sss.pgh.pa.us 1950 : 1909 : return size;
1951 : : }
1952 : :
1953 : : void
7436 1954 : 1029 : MultiXactShmemInit(void)
1955 : : {
1956 : : bool found;
1957 : :
1958 : : debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
1959 : :
1960 : 1029 : MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes;
1961 : 1029 : MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
1962 : :
7214 1963 : 1029 : SimpleLruInit(MultiXactOffsetCtl,
1964 : : "multixact_offset", multixact_offset_buffers, 0,
1965 : : "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1966 : : LWTRANCHE_MULTIXACTOFFSET_SLRU,
1967 : : SYNC_HANDLER_MULTIXACT_OFFSET,
1968 : : false);
1694 noah@leadboat.com 1969 : 1029 : SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
7214 tgl@sss.pgh.pa.us 1970 : 1029 : SimpleLruInit(MultiXactMemberCtl,
1971 : : "multixact_member", multixact_member_buffers, 0,
1972 : : "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1973 : : LWTRANCHE_MULTIXACTMEMBER_SLRU,
1974 : : SYNC_HANDLER_MULTIXACT_MEMBER,
1975 : : false);
1976 : : /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
1977 : :
1978 : : /* Initialize our shared state struct */
7436 1979 : 1029 : MultiXactState = ShmemInitStruct("Shared MultiXact State",
1980 : 1029 : SHARED_MULTIXACT_STATE_SIZE,
1981 : : &found);
1982 [ + - ]: 1029 : if (!IsUnderPostmaster)
1983 : : {
1984 [ - + ]: 1029 : Assert(!found);
1985 : :
1986 : : /* Make sure we zero out the per-backend state */
1987 [ + - - + : 1029 : MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
- - - - -
- ]
517 alvherre@alvh.no-ip. 1988 : 1029 : ConditionVariableInit(&MultiXactState->nextoff_cv);
1989 : : }
1990 : : else
7436 tgl@sss.pgh.pa.us 1991 [ # # ]:UBC 0 : Assert(found);
1992 : :
1993 : : /*
1994 : : * Set up array pointers.
1995 : : */
7436 tgl@sss.pgh.pa.us 1996 :CBC 1029 : OldestMemberMXactId = MultiXactState->perBackendXactIds;
1243 rhaas@postgresql.org 1997 : 1029 : OldestVisibleMXactId = OldestMemberMXactId + MaxOldestSlot;
7436 tgl@sss.pgh.pa.us 1998 : 1029 : }
1999 : :
2000 : : /*
2001 : : * GUC check_hook for multixact_offset_buffers
2002 : : */
2003 : : bool
556 alvherre@alvh.no-ip. 2004 : 1067 : check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
2005 : : {
2006 : 1067 : return check_slru_buffers("multixact_offset_buffers", newval);
2007 : : }
2008 : :
2009 : : /*
2010 : : * GUC check_hook for multixact_member_buffers
2011 : : */
2012 : : bool
2013 : 1067 : check_multixact_member_buffers(int *newval, void **extra, GucSource source)
2014 : : {
2015 : 1067 : return check_slru_buffers("multixact_member_buffers", newval);
2016 : : }
2017 : :
2018 : : /*
2019 : : * This func must be called ONCE on system install. It creates the initial
2020 : : * MultiXact segments. (The MultiXacts directories are assumed to have been
2021 : : * created by initdb, and MultiXactShmemInit must have been called already.)
2022 : : */
2023 : : void
7436 tgl@sss.pgh.pa.us 2024 : 50 : BootStrapMultiXact(void)
2025 : : {
2026 : : /* Zero the initial pages and flush them to disk */
61 alvherre@kurilemu.de 2027 :GNC 50 : SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0);
2028 : 50 : SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
7436 tgl@sss.pgh.pa.us 2029 :GIC 50 : }
2030 : :
2031 : : /*
2032 : : * MaybeExtendOffsetSlru
2033 : : * Extend the offsets SLRU area, if necessary
2034 : : *
2035 : : * After a binary upgrade from <= 9.2, the pg_multixact/offsets SLRU area might
2036 : : * contain files that are shorter than necessary; this would occur if the old
2037 : : * installation had used multixacts beyond the first page (files cannot be
2038 : : * copied, because the on-disk representation is different). pg_upgrade would
2039 : : * update pg_control to set the next offset value to be at that position, so
2040 : : * that tuples marked as locked by such MultiXacts would be seen as visible
2041 : : * without having to consult multixact. However, trying to create and use a
2042 : : * new MultiXactId would result in an error because the page on which the new
2043 : : * value would reside does not exist. This routine is in charge of creating
2044 : : * such pages.
2045 : : */
2046 : : static void
4401 alvherre@alvh.no-ip. 2047 :CBC 47 : MaybeExtendOffsetSlru(void)
2048 : : {
2049 : : int64 pageno;
2050 : : LWLock *lock;
2051 : :
2052 : 47 : pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
556 2053 : 47 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
2054 : :
2055 : 47 : LWLockAcquire(lock, LW_EXCLUSIVE);
2056 : :
4401 2057 [ - + ]: 47 : if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
2058 : : {
2059 : : int slotno;
2060 : :
2061 : : /*
2062 : : * Fortunately for us, SimpleLruWritePage is already prepared to deal
2063 : : * with creating a new segment file even if the page we're writing is
2064 : : * not the first in it, so this is enough.
2065 : : */
61 alvherre@kurilemu.de 2066 :UNC 0 : slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
4401 alvherre@alvh.no-ip. 2067 :UBC 0 : SimpleLruWritePage(MultiXactOffsetCtl, slotno);
2068 : : }
2069 : :
556 alvherre@alvh.no-ip. 2070 :CBC 47 : LWLockRelease(lock);
4401 2071 : 47 : }
2072 : :
2073 : : /*
2074 : : * This must be called ONCE during postmaster or standalone-backend startup.
2075 : : *
2076 : : * StartupXLOG has already established nextMXact/nextOffset by calling
2077 : : * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
2078 : : * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
2079 : : * replayed WAL.
2080 : : */
2081 : : void
7436 tgl@sss.pgh.pa.us 2082 : 887 : StartupMultiXact(void)
2083 : : {
4299 alvherre@alvh.no-ip. 2084 : 887 : MultiXactId multi = MultiXactState->nextMXact;
2085 : 887 : MultiXactOffset offset = MultiXactState->nextOffset;
2086 : : int64 pageno;
2087 : :
2088 : : /*
2089 : : * Initialize offset's idea of the latest page number.
2090 : : */
2091 : 887 : pageno = MultiXactIdToOffsetPage(multi);
578 2092 : 887 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2093 : : pageno);
2094 : :
2095 : : /*
2096 : : * Initialize member's idea of the latest page number.
2097 : : */
4299 2098 : 887 : pageno = MXOffsetToMemberPage(offset);
578 2099 : 887 : pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
2100 : : pageno);
4299 2101 : 887 : }
2102 : :
2103 : : /*
2104 : : * This must be called ONCE at the end of startup/recovery.
2105 : : */
2106 : : void
2107 : 832 : TrimMultiXact(void)
2108 : : {
2109 : : MultiXactId nextMXact;
2110 : : MultiXactOffset offset;
2111 : : MultiXactId oldestMXact;
2112 : : Oid oldestMXactDB;
2113 : : int64 pageno;
2114 : : int entryno;
2115 : : int flagsoff;
2116 : :
3633 andres@anarazel.de 2117 : 832 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
2118 : 832 : nextMXact = MultiXactState->nextMXact;
2119 : 832 : offset = MultiXactState->nextOffset;
2120 : 832 : oldestMXact = MultiXactState->oldestMultiXactId;
2121 : 832 : oldestMXactDB = MultiXactState->oldestMultiXactDB;
2122 : 832 : LWLockRelease(MultiXactGenLock);
2123 : :
2124 : : /* Clean up offsets state */
2125 : :
2126 : : /*
2127 : : * (Re-)Initialize our idea of the latest page number for offsets.
2128 : : */
2129 : 832 : pageno = MultiXactIdToOffsetPage(nextMXact);
578 alvherre@alvh.no-ip. 2130 : 832 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2131 : : pageno);
2132 : :
2133 : : /*
2134 : : * Zero out the remainder of the current offsets page. See notes in
2135 : : * TrimCLOG() for background. Unlike CLOG, some WAL record covers every
2136 : : * pg_multixact SLRU mutation. Since, also unlike CLOG, we ignore the WAL
2137 : : * rule "write xlog before data," nextMXact successors may carry obsolete,
2138 : : * nonzero offset values. Zero those so case 2 of GetMultiXactIdMembers()
2139 : : * operates normally.
2140 : : */
3633 andres@anarazel.de 2141 : 832 : entryno = MultiXactIdToOffsetEntry(nextMXact);
7395 tgl@sss.pgh.pa.us 2142 [ + + ]: 832 : if (entryno != 0)
2143 : : {
2144 : : int slotno;
2145 : : MultiXactOffset *offptr;
556 alvherre@alvh.no-ip. 2146 : 831 : LWLock *lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
2147 : :
2148 : 831 : LWLockAcquire(lock, LW_EXCLUSIVE);
3633 andres@anarazel.de 2149 : 831 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
7395 tgl@sss.pgh.pa.us 2150 : 831 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2151 : 831 : offptr += entryno;
2152 : :
2153 [ - + - - : 831 : MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
- - - - -
- ]
2154 : :
7245 2155 : 831 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
556 alvherre@alvh.no-ip. 2156 : 831 : LWLockRelease(lock);
2157 : : }
2158 : :
2159 : : /*
2160 : : * And the same for members.
2161 : : *
2162 : : * (Re-)Initialize our idea of the latest page number for members.
2163 : : */
7395 tgl@sss.pgh.pa.us 2164 : 832 : pageno = MXOffsetToMemberPage(offset);
578 alvherre@alvh.no-ip. 2165 : 832 : pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
2166 : : pageno);
2167 : :
2168 : : /*
2169 : : * Zero out the remainder of the current members page. See notes in
2170 : : * TrimCLOG() for motivation.
2171 : : */
4609 2172 : 832 : flagsoff = MXOffsetToFlagsOffset(offset);
2173 [ + + ]: 832 : if (flagsoff != 0)
2174 : : {
2175 : : int slotno;
2176 : : TransactionId *xidptr;
2177 : : int memberoff;
556 2178 : 11 : LWLock *lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
2179 : :
2180 : 11 : LWLockAcquire(lock, LW_EXCLUSIVE);
4609 2181 : 11 : memberoff = MXOffsetToMemberOffset(offset);
6611 tgl@sss.pgh.pa.us 2182 : 11 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
4609 alvherre@alvh.no-ip. 2183 : 11 : xidptr = (TransactionId *)
2184 : 11 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
2185 : :
2186 [ + + + - : 11 : MemSet(xidptr, 0, BLCKSZ - memberoff);
+ - - + -
- ]
2187 : :
2188 : : /*
2189 : : * Note: we don't need to zero out the flag bits in the remaining
2190 : : * members of the current group, because they are always reset before
2191 : : * writing.
2192 : : */
2193 : :
7245 tgl@sss.pgh.pa.us 2194 : 11 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
556 alvherre@alvh.no-ip. 2195 : 11 : LWLockRelease(lock);
2196 : : }
2197 : :
2198 : : /* signal that we're officially up */
3633 andres@anarazel.de 2199 : 832 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2200 : 832 : MultiXactState->finishedStartup = true;
3746 rhaas@postgresql.org 2201 : 832 : LWLockRelease(MultiXactGenLock);
2202 : :
2203 : : /* Now compute how far away the next members wraparound is. */
3098 tgl@sss.pgh.pa.us 2204 : 832 : SetMultiXactIdLimit(oldestMXact, oldestMXactDB, true);
7436 2205 : 832 : }
2206 : :
2207 : : /*
2208 : : * Get the MultiXact data to save in a checkpoint record
2209 : : */
2210 : : void
7395 2211 : 1479 : MultiXactGetCheckptMulti(bool is_shutdown,
2212 : : MultiXactId *nextMulti,
2213 : : MultiXactOffset *nextMultiOffset,
2214 : : MultiXactId *oldestMulti,
2215 : : Oid *oldestMultiDB)
2216 : : {
7436 2217 : 1479 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
7395 2218 : 1479 : *nextMulti = MultiXactState->nextMXact;
2219 : 1479 : *nextMultiOffset = MultiXactState->nextOffset;
4609 alvherre@alvh.no-ip. 2220 : 1479 : *oldestMulti = MultiXactState->oldestMultiXactId;
2221 : 1479 : *oldestMultiDB = MultiXactState->oldestMultiXactDB;
7436 tgl@sss.pgh.pa.us 2222 : 1479 : LWLockRelease(MultiXactGenLock);
2223 : :
2224 : : debug_elog6(DEBUG2,
2225 : : "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
2226 : : *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
2227 : 1479 : }
2228 : :
2229 : : /*
2230 : : * Perform a checkpoint --- either during shutdown, or on-the-fly
2231 : : */
2232 : : void
2233 : 1677 : CheckPointMultiXact(void)
2234 : : {
2235 : : TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_START(true);
2236 : :
2237 : : /*
2238 : : * Write dirty MultiXact pages to disk. This may result in sync requests
2239 : : * queued for later handling by ProcessSyncRequests(), as part of the
2240 : : * checkpoint.
2241 : : */
1807 tmunro@postgresql.or 2242 : 1677 : SimpleLruWriteAll(MultiXactOffsetCtl, true);
2243 : 1677 : SimpleLruWriteAll(MultiXactMemberCtl, true);
2244 : :
2245 : : TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
7436 tgl@sss.pgh.pa.us 2246 : 1677 : }
2247 : :
2248 : : /*
2249 : : * Set the next-to-be-assigned MultiXactId and offset
2250 : : *
2251 : : * This is used when we can determine the correct next ID/offset exactly
2252 : : * from a checkpoint record. Although this is only called during bootstrap
2253 : : * and XLog replay, we take the lock in case any hot-standby backends are
2254 : : * examining the values.
2255 : : */
2256 : : void
7395 2257 : 967 : MultiXactSetNextMXact(MultiXactId nextMulti,
2258 : : MultiXactOffset nextMultiOffset)
2259 : : {
2260 : : debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
2261 : : nextMulti, nextMultiOffset);
4961 2262 : 967 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
7436 2263 : 967 : MultiXactState->nextMXact = nextMulti;
7395 2264 : 967 : MultiXactState->nextOffset = nextMultiOffset;
4961 2265 : 967 : LWLockRelease(MultiXactGenLock);
2266 : :
2267 : : /*
2268 : : * During a binary upgrade, make sure that the offsets SLRU is large
2269 : : * enough to contain the next value that would be created.
2270 : : *
2271 : : * We need to do this pretty early during the first startup in binary
2272 : : * upgrade mode: before StartupMultiXact() in fact, because this routine
2273 : : * is called even before that by StartupXLOG(). And we can't do it
2274 : : * earlier than at this point, because during that first call of this
2275 : : * routine we determine the MultiXactState->nextMXact value that
2276 : : * MaybeExtendOffsetSlru needs.
2277 : : */
3782 alvherre@alvh.no-ip. 2278 [ + + ]: 967 : if (IsBinaryUpgrade)
2279 : 47 : MaybeExtendOffsetSlru();
7436 tgl@sss.pgh.pa.us 2280 : 967 : }
2281 : :
2282 : : /*
2283 : : * Determine the last safe MultiXactId to allocate given the currently oldest
2284 : : * datminmxid (ie, the oldest MultiXactId that might exist in any database
2285 : : * of our cluster), and the OID of the (or a) database with that value.
2286 : : *
2287 : : * is_startup is true when we are just starting the cluster, false when we
2288 : : * are updating state in a running cluster. This only affects log messages.
2289 : : */
2290 : : void
3098 2291 : 1867 : SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
2292 : : bool is_startup)
2293 : : {
2294 : : MultiXactId multiVacLimit;
2295 : : MultiXactId multiWarnLimit;
2296 : : MultiXactId multiStopLimit;
2297 : : MultiXactId multiWrapLimit;
2298 : : MultiXactId curMulti;
2299 : : bool needs_offset_vacuum;
2300 : :
4609 alvherre@alvh.no-ip. 2301 [ - + ]: 1867 : Assert(MultiXactIdIsValid(oldest_datminmxid));
2302 : :
2303 : : /*
2304 : : * We pretend that a wrap will happen halfway through the multixact ID
2305 : : * space, but that's not really true, because multixacts wrap differently
2306 : : * from transaction IDs. Note that, separately from any concern about
2307 : : * multixact IDs wrapping, we must ensure that multixact members do not
2308 : : * wrap. Limits for that are set in SetOffsetVacuumLimit, not here.
2309 : : */
2310 : 1867 : multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2311 [ - + ]: 1867 : if (multiWrapLimit < FirstMultiXactId)
4609 alvherre@alvh.no-ip. 2312 :UBC 0 : multiWrapLimit += FirstMultiXactId;
2313 : :
2314 : : /*
2315 : : * We'll refuse to continue assigning MultiXactIds once we get within 3M
2316 : : * multi of data loss. See SetTransactionIdLimit.
2317 : : */
1862 noah@leadboat.com 2318 :CBC 1867 : multiStopLimit = multiWrapLimit - 3000000;
4609 alvherre@alvh.no-ip. 2319 [ - + ]: 1867 : if (multiStopLimit < FirstMultiXactId)
4609 alvherre@alvh.no-ip. 2320 :UBC 0 : multiStopLimit -= FirstMultiXactId;
2321 : :
2322 : : /*
2323 : : * We'll start complaining loudly when we get within 40M multis of data
2324 : : * loss. This is kind of arbitrary, but if you let your gas gauge get
2325 : : * down to 2% of full, would you be looking for the next gas station? We
2326 : : * need to be fairly liberal about this number because there are lots of
2327 : : * scenarios where most transactions are done by automatic clients that
2328 : : * won't pay attention to warnings. (No, we're not gonna make this
2329 : : * configurable. If you know enough to configure it, you know enough to
2330 : : * not get in this kind of trouble in the first place.)
2331 : : */
1862 noah@leadboat.com 2332 :CBC 1867 : multiWarnLimit = multiWrapLimit - 40000000;
4609 alvherre@alvh.no-ip. 2333 [ - + ]: 1867 : if (multiWarnLimit < FirstMultiXactId)
4609 alvherre@alvh.no-ip. 2334 :UBC 0 : multiWarnLimit -= FirstMultiXactId;
2335 : :
2336 : : /*
2337 : : * We'll start trying to force autovacuums when oldest_datminmxid gets to
2338 : : * be more than autovacuum_multixact_freeze_max_age mxids old.
2339 : : *
2340 : : * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2341 : : * so that we don't have to worry about dealing with on-the-fly changes in
2342 : : * its value. See SetTransactionIdLimit.
2343 : : */
4223 alvherre@alvh.no-ip. 2344 :CBC 1867 : multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age;
4609 2345 [ - + ]: 1867 : if (multiVacLimit < FirstMultiXactId)
4609 alvherre@alvh.no-ip. 2346 :UBC 0 : multiVacLimit += FirstMultiXactId;
2347 : :
2348 : : /* Grab lock for just long enough to set the new limit values */
4609 alvherre@alvh.no-ip. 2349 :CBC 1867 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2350 : 1867 : MultiXactState->oldestMultiXactId = oldest_datminmxid;
2351 : 1867 : MultiXactState->oldestMultiXactDB = oldest_datoid;
2352 : 1867 : MultiXactState->multiVacLimit = multiVacLimit;
2353 : 1867 : MultiXactState->multiWarnLimit = multiWarnLimit;
2354 : 1867 : MultiXactState->multiStopLimit = multiStopLimit;
2355 : 1867 : MultiXactState->multiWrapLimit = multiWrapLimit;
2356 : 1867 : curMulti = MultiXactState->nextMXact;
2357 : 1867 : LWLockRelease(MultiXactGenLock);
2358 : :
2359 : : /* Log the info */
2360 [ + + ]: 1867 : ereport(DEBUG1,
2361 : : (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2362 : : multiWrapLimit, oldest_datoid)));
2363 : :
2364 : : /*
2365 : : * Computing the actual limits is only possible once the data directory is
2366 : : * in a consistent state. There's no need to compute the limits while
2367 : : * still replaying WAL - no decisions about new multis are made even
2368 : : * though multixact creations might be replayed. So we'll only do further
2369 : : * checks after TrimMultiXact() has been called.
2370 : : */
3633 andres@anarazel.de 2371 [ + + ]: 1867 : if (!MultiXactState->finishedStartup)
2372 : 937 : return;
2373 : :
2374 [ - + ]: 930 : Assert(!InRecovery);
2375 : :
2376 : : /* Set limits for offset vacuum. */
3098 tgl@sss.pgh.pa.us 2377 : 930 : needs_offset_vacuum = SetOffsetVacuumLimit(is_startup);
2378 : :
2379 : : /*
2380 : : * If past the autovacuum force point, immediately signal an autovac
2381 : : * request. The reason for this is that autovac only processes one
2382 : : * database per invocation. Once it's finished cleaning up the oldest
2383 : : * database, it'll call here, and we'll signal the postmaster to start
2384 : : * another iteration immediately if there are still any old databases.
2385 : : */
3771 rhaas@postgresql.org 2386 [ + - - + ]: 930 : if ((MultiXactIdPrecedes(multiVacLimit, curMulti) ||
3633 andres@anarazel.de 2387 [ # # ]:UBC 0 : needs_offset_vacuum) && IsUnderPostmaster)
4609 alvherre@alvh.no-ip. 2388 : 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
2389 : :
2390 : : /* Give an immediate warning if past the wrap warn point */
3633 andres@anarazel.de 2391 [ - + ]:CBC 930 : if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2392 : : {
2393 : : char *oldest_datname;
2394 : :
2395 : : /*
2396 : : * We can be called when not inside a transaction, for example during
2397 : : * StartupXLOG(). In such a case we cannot do database access, so we
2398 : : * must just report the oldest DB's OID.
2399 : : *
2400 : : * Note: it's also possible that get_database_name fails and returns
2401 : : * NULL, for example because the database just got dropped. We'll
2402 : : * still warn, even though the warning might now be unnecessary.
2403 : : */
4609 alvherre@alvh.no-ip. 2404 [ # # ]:UBC 0 : if (IsTransactionState())
2405 : 0 : oldest_datname = get_database_name(oldest_datoid);
2406 : : else
2407 : 0 : oldest_datname = NULL;
2408 : :
2409 [ # # ]: 0 : if (oldest_datname)
2410 [ # # ]: 0 : ereport(WARNING,
2411 : : (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2412 : : "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2413 : : multiWrapLimit - curMulti,
2414 : : oldest_datname,
2415 : : multiWrapLimit - curMulti),
2416 : : errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2417 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2418 : : else
2419 [ # # ]: 0 : ereport(WARNING,
2420 : : (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2421 : : "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2422 : : multiWrapLimit - curMulti,
2423 : : oldest_datoid,
2424 : : multiWrapLimit - curMulti),
2425 : : errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2426 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2427 : : }
2428 : : }
2429 : :
2430 : : /*
2431 : : * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2432 : : * and similarly nextOffset is at least minMultiOffset.
2433 : : *
2434 : : * This is used when we can determine minimum safe values from an XLog
2435 : : * record (either an on-line checkpoint or an mxact creation log entry).
2436 : : * Although this is only called during XLog replay, we take the lock in case
2437 : : * any hot-standby backends are examining the values.
2438 : : */
2439 : : void
7395 tgl@sss.pgh.pa.us 2440 :CBC 666 : MultiXactAdvanceNextMXact(MultiXactId minMulti,
2441 : : MultiXactOffset minMultiOffset)
2442 : : {
4961 2443 : 666 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
7436 2444 [ + + ]: 666 : if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
2445 : : {
2446 : : debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
2447 : 2 : MultiXactState->nextMXact = minMulti;
2448 : : }
7395 2449 [ + + ]: 666 : if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
2450 : : {
2451 : : debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
2452 : : minMultiOffset);
2453 : 2 : MultiXactState->nextOffset = minMultiOffset;
2454 : : }
4961 2455 : 666 : LWLockRelease(MultiXactGenLock);
7436 2456 : 666 : }
2457 : :
2458 : : /*
2459 : : * Update our oldestMultiXactId value, but only if it's more recent than what
2460 : : * we had.
2461 : : *
2462 : : * This may only be called during WAL replay.
2463 : : */
2464 : : void
4609 alvherre@alvh.no-ip. 2465 : 694 : MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2466 : : {
3633 andres@anarazel.de 2467 [ - + ]: 694 : Assert(InRecovery);
2468 : :
4609 alvherre@alvh.no-ip. 2469 [ - + ]: 694 : if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
3098 tgl@sss.pgh.pa.us 2470 :UBC 0 : SetMultiXactIdLimit(oldestMulti, oldestMultiDB, false);
4089 alvherre@alvh.no-ip. 2471 :CBC 694 : }
2472 : :
2473 : : /*
2474 : : * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2475 : : *
2476 : : * NB: this is called while holding MultiXactGenLock. We want it to be very
2477 : : * fast most of the time; even when it's not so fast, no actual I/O need
2478 : : * happen unless we're forced to write out a dirty log or xlog page to make
2479 : : * room in shared memory.
2480 : : */
2481 : : static void
7436 tgl@sss.pgh.pa.us 2482 : 294 : ExtendMultiXactOffset(MultiXactId multi)
2483 : : {
2484 : : int64 pageno;
2485 : : LWLock *lock;
2486 : :
2487 : : /*
2488 : : * No work except at first MultiXactId of a page. But beware: just after
2489 : : * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2490 : : */
2491 [ + - + + ]: 294 : if (MultiXactIdToOffsetEntry(multi) != 0 &&
2492 : : multi != FirstMultiXactId)
2493 : 284 : return;
2494 : :
2495 : 10 : pageno = MultiXactIdToOffsetPage(multi);
556 alvherre@alvh.no-ip. 2496 : 10 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
2497 : :
2498 : 10 : LWLockAcquire(lock, LW_EXCLUSIVE);
2499 : :
2500 : : /* Zero the page and make a WAL entry about it */
61 alvherre@kurilemu.de 2501 :GNC 10 : SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
2502 : 10 : XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE,
2503 : : pageno);
2504 : :
556 alvherre@alvh.no-ip. 2505 :CBC 10 : LWLockRelease(lock);
2506 : : }
2507 : :
2508 : : /*
2509 : : * Make sure that MultiXactMember has room for the members of a newly-
2510 : : * allocated MultiXactId.
2511 : : *
2512 : : * Like the above routine, this is called while holding MultiXactGenLock;
2513 : : * same comments apply.
2514 : : */
2515 : : static void
7395 tgl@sss.pgh.pa.us 2516 : 294 : ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
2517 : : {
2518 : : /*
2519 : : * It's possible that the members span more than one page of the members
2520 : : * file, so we loop to ensure we consider each page. The coding is not
2521 : : * optimal if the members span several pages, but that seems unusual
2522 : : * enough to not worry much about.
2523 : : */
2524 [ + + ]: 588 : while (nmembers > 0)
2525 : : {
2526 : : int flagsoff;
2527 : : int flagsbit;
2528 : : uint32 difference;
2529 : :
2530 : : /*
2531 : : * Only zero when at first entry of a page.
2532 : : */
4609 alvherre@alvh.no-ip. 2533 : 294 : flagsoff = MXOffsetToFlagsOffset(offset);
2534 : 294 : flagsbit = MXOffsetToFlagsBitShift(offset);
2535 [ + + + + ]: 294 : if (flagsoff == 0 && flagsbit == 0)
2536 : : {
2537 : : int64 pageno;
2538 : : LWLock *lock;
2539 : :
7395 tgl@sss.pgh.pa.us 2540 : 10 : pageno = MXOffsetToMemberPage(offset);
556 alvherre@alvh.no-ip. 2541 : 10 : lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
2542 : :
2543 : 10 : LWLockAcquire(lock, LW_EXCLUSIVE);
2544 : :
2545 : : /* Zero the page and make a WAL entry about it */
61 alvherre@kurilemu.de 2546 :GNC 10 : SimpleLruZeroPage(MultiXactMemberCtl, pageno);
2547 : 10 : XLogSimpleInsertInt64(RM_MULTIXACT_ID,
2548 : : XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno);
2549 : :
556 alvherre@alvh.no-ip. 2550 :CBC 10 : LWLockRelease(lock);
2551 : : }
2552 : :
2553 : : /*
2554 : : * Compute the number of items till end of current page. Careful: if
2555 : : * addition of unsigned ints wraps around, we're at the last page of
2556 : : * the last segment; since that page holds a different number of items
2557 : : * than other pages, we need to do it differently.
2558 : : */
4107 2559 [ - + ]: 294 : if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
2560 : : {
2561 : : /*
2562 : : * This is the last page of the last segment; we can compute the
2563 : : * number of items left to allocate in it without modulo
2564 : : * arithmetic.
2565 : : */
4107 alvherre@alvh.no-ip. 2566 :UBC 0 : difference = MaxMultiXactOffset - offset + 1;
2567 : : }
2568 : : else
4265 alvherre@alvh.no-ip. 2569 :CBC 294 : difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
2570 : :
2571 : : /*
2572 : : * Advance to next page, taking care to properly handle the wraparound
2573 : : * case. OK if nmembers goes negative.
2574 : : */
4107 2575 : 294 : nmembers -= difference;
2576 : 294 : offset += difference;
2577 : : }
7436 tgl@sss.pgh.pa.us 2578 : 294 : }
2579 : :
2580 : : /*
2581 : : * GetOldestMultiXactId
2582 : : *
2583 : : * Return the oldest MultiXactId that's still possibly still seen as live by
2584 : : * any running transaction. Older ones might still exist on disk, but they no
2585 : : * longer have any running member transaction.
2586 : : *
2587 : : * It's not safe to truncate MultiXact SLRU segments on the value returned by
2588 : : * this function; however, it can be set as the new relminmxid for any table
2589 : : * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2590 : : * to truncate SLRUs when no table can possibly still have a referencing MXID.
2591 : : */
2592 : : MultiXactId
4609 alvherre@alvh.no-ip. 2593 : 46208 : GetOldestMultiXactId(void)
2594 : : {
2595 : : MultiXactId oldestMXact;
2596 : : MultiXactId nextMXact;
2597 : : int i;
2598 : :
2599 : : /*
2600 : : * This is the oldest valid value among all the OldestMemberMXactId[] and
2601 : : * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2602 : : */
7436 tgl@sss.pgh.pa.us 2603 : 46208 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
2604 : :
2605 : : /*
2606 : : * We have to beware of the possibility that nextMXact is in the
2607 : : * wrapped-around state. We don't fix the counter itself here, but we
2608 : : * must be sure to use a valid value in our calculation.
2609 : : */
2610 : 46208 : nextMXact = MultiXactState->nextMXact;
2611 [ - + ]: 46208 : if (nextMXact < FirstMultiXactId)
7436 tgl@sss.pgh.pa.us 2612 :UBC 0 : nextMXact = FirstMultiXactId;
2613 : :
7436 tgl@sss.pgh.pa.us 2614 :CBC 46208 : oldestMXact = nextMXact;
552 heikki.linnakangas@i 2615 [ + + ]: 5524556 : for (i = 0; i < MaxOldestSlot; i++)
2616 : : {
2617 : : MultiXactId thisoldest;
2618 : :
7436 tgl@sss.pgh.pa.us 2619 : 5478348 : thisoldest = OldestMemberMXactId[i];
2620 [ + + + + ]: 5502798 : if (MultiXactIdIsValid(thisoldest) &&
2621 : 24450 : MultiXactIdPrecedes(thisoldest, oldestMXact))
2622 : 14 : oldestMXact = thisoldest;
2623 : 5478348 : thisoldest = OldestVisibleMXactId[i];
2624 [ + + + + ]: 5478449 : if (MultiXactIdIsValid(thisoldest) &&
2625 : 101 : MultiXactIdPrecedes(thisoldest, oldestMXact))
2626 : 2 : oldestMXact = thisoldest;
2627 : : }
2628 : :
2629 : 46208 : LWLockRelease(MultiXactGenLock);
2630 : :
4609 alvherre@alvh.no-ip. 2631 : 46208 : return oldestMXact;
2632 : : }
2633 : :
2634 : : /*
2635 : : * Determine how aggressively we need to vacuum in order to prevent member
2636 : : * wraparound.
2637 : : *
2638 : : * To do so determine what's the oldest member offset and install the limit
2639 : : * info in MultiXactState, where it can be used to prevent overrun of old data
2640 : : * in the members SLRU area.
2641 : : *
2642 : : * The return value is true if emergency autovacuum is required and false
2643 : : * otherwise.
2644 : : */
2645 : : static bool
3098 tgl@sss.pgh.pa.us 2646 : 930 : SetOffsetVacuumLimit(bool is_startup)
2647 : : {
2648 : : MultiXactId oldestMultiXactId;
2649 : : MultiXactId nextMXact;
3633 andres@anarazel.de 2650 : 930 : MultiXactOffset oldestOffset = 0; /* placate compiler */
2651 : : MultiXactOffset prevOldestOffset;
2652 : : MultiXactOffset nextOffset;
3746 rhaas@postgresql.org 2653 : 930 : bool oldestOffsetKnown = false;
2654 : : bool prevOldestOffsetKnown;
3633 andres@anarazel.de 2655 : 930 : MultiXactOffset offsetStopLimit = 0;
2656 : : MultiXactOffset prevOffsetStopLimit;
2657 : :
2658 : : /*
2659 : : * NB: Have to prevent concurrent truncation, we might otherwise try to
2660 : : * lookup an oldestMulti that's concurrently getting truncated away.
2661 : : */
2662 : 930 : LWLockAcquire(MultiXactTruncationLock, LW_SHARED);
2663 : :
2664 : : /* Read relevant fields from shared memory. */
3746 rhaas@postgresql.org 2665 : 930 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
2666 : 930 : oldestMultiXactId = MultiXactState->oldestMultiXactId;
2667 : 930 : nextMXact = MultiXactState->nextMXact;
2668 : 930 : nextOffset = MultiXactState->nextOffset;
2669 : 930 : prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
3633 andres@anarazel.de 2670 : 930 : prevOldestOffset = MultiXactState->oldestOffset;
3554 2671 : 930 : prevOffsetStopLimit = MultiXactState->offsetStopLimit;
3633 2672 [ - + ]: 930 : Assert(MultiXactState->finishedStartup);
3784 alvherre@alvh.no-ip. 2673 : 930 : LWLockRelease(MultiXactGenLock);
2674 : :
2675 : : /*
2676 : : * Determine the offset of the oldest multixact. Normally, we can read
2677 : : * the offset from the multixact itself, but there's an important special
2678 : : * case: if there are no multixacts in existence at all, oldestMXact
2679 : : * obviously can't point to one. It will instead point to the multixact
2680 : : * ID that will be assigned the next time one is needed.
2681 : : */
3746 rhaas@postgresql.org 2682 [ + + ]: 930 : if (oldestMultiXactId == nextMXact)
2683 : : {
2684 : : /*
2685 : : * When the next multixact gets created, it will be stored at the next
2686 : : * offset.
2687 : : */
2688 : 916 : oldestOffset = nextOffset;
2689 : 916 : oldestOffsetKnown = true;
2690 : : }
2691 : : else
2692 : : {
2693 : : /*
2694 : : * Figure out where the oldest existing multixact's offsets are
2695 : : * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X,
2696 : : * the supposedly-earliest multixact might not really exist. We are
2697 : : * careful not to fail in that case.
2698 : : */
2699 : : oldestOffsetKnown =
2700 : 14 : find_multixact_start(oldestMultiXactId, &oldestOffset);
2701 : :
2702 [ + - ]: 14 : if (oldestOffsetKnown)
2703 [ - + ]: 14 : ereport(DEBUG1,
2704 : : (errmsg_internal("oldest MultiXactId member is at offset %u",
2705 : : oldestOffset)));
2706 : : else
3633 andres@anarazel.de 2707 [ # # ]:UBC 0 : ereport(LOG,
2708 : : (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
2709 : : oldestMultiXactId)));
2710 : : }
2711 : :
3633 andres@anarazel.de 2712 :CBC 930 : LWLockRelease(MultiXactTruncationLock);
2713 : :
2714 : : /*
2715 : : * If we can, compute limits (and install them MultiXactState) to prevent
2716 : : * overrun of old data in the members SLRU area. We can only do so if the
2717 : : * oldest offset is known though.
2718 : : */
2719 [ + - ]: 930 : if (oldestOffsetKnown)
2720 : : {
2721 : : /* move back to start of the corresponding segment */
2722 : 930 : offsetStopLimit = oldestOffset - (oldestOffset %
2723 : : (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
2724 : :
2725 : : /* always leave one segment before the wraparound point */
2726 : 930 : offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
2727 : :
3098 tgl@sss.pgh.pa.us 2728 [ + + - + ]: 930 : if (!prevOldestOffsetKnown && !is_startup)
3633 andres@anarazel.de 2729 [ # # ]:UBC 0 : ereport(LOG,
2730 : : (errmsg("MultiXact member wraparound protections are now enabled")));
2731 : :
3633 andres@anarazel.de 2732 [ + + ]:CBC 930 : ereport(DEBUG1,
2733 : : (errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u",
2734 : : offsetStopLimit, oldestMultiXactId)));
2735 : : }
3633 andres@anarazel.de 2736 [ # # ]:UBC 0 : else if (prevOldestOffsetKnown)
2737 : : {
2738 : : /*
2739 : : * If we failed to get the oldest offset this time, but we have a
2740 : : * value from a previous pass through this function, use the old
2741 : : * values rather than automatically forcing an emergency autovacuum
2742 : : * cycle again.
2743 : : */
3732 rhaas@postgresql.org 2744 : 0 : oldestOffset = prevOldestOffset;
2745 : 0 : oldestOffsetKnown = true;
3554 andres@anarazel.de 2746 : 0 : offsetStopLimit = prevOffsetStopLimit;
2747 : : }
2748 : :
2749 : : /* Install the computed values */
3633 andres@anarazel.de 2750 :CBC 930 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2751 : 930 : MultiXactState->oldestOffset = oldestOffset;
2752 : 930 : MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
2753 : 930 : MultiXactState->offsetStopLimit = offsetStopLimit;
2754 : 930 : LWLockRelease(MultiXactGenLock);
2755 : :
2756 : : /*
2757 : : * Do we need an emergency autovacuum? If we're not sure, assume yes.
2758 : : */
3746 rhaas@postgresql.org 2759 [ + - ]: 1860 : return !oldestOffsetKnown ||
2760 [ - + ]: 930 : (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
2761 : : }
2762 : :
2763 : : /*
2764 : : * Return whether adding "distance" to "start" would move past "boundary".
2765 : : *
2766 : : * We use this to determine whether the addition is "wrapping around" the
2767 : : * boundary point, hence the name. The reason we don't want to use the regular
2768 : : * 2^31-modulo arithmetic here is that we want to be able to use the whole of
2769 : : * the 2^32-1 space here, allowing for more multixacts than would fit
2770 : : * otherwise.
2771 : : */
2772 : : static bool
3784 alvherre@alvh.no-ip. 2773 : 588 : MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
2774 : : uint32 distance)
2775 : : {
2776 : : MultiXactOffset finish;
2777 : :
2778 : : /*
2779 : : * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
2780 : : * if the addition wraps around the UINT_MAX boundary, skip that value.
2781 : : */
2782 : 588 : finish = start + distance;
2783 [ - + ]: 588 : if (finish < start)
3784 alvherre@alvh.no-ip. 2784 :UBC 0 : finish++;
2785 : :
2786 : : /*-----------------------------------------------------------------------
2787 : : * When the boundary is numerically greater than the starting point, any
2788 : : * value numerically between the two is not wrapped:
2789 : : *
2790 : : * <----S----B---->
2791 : : * [---) = F wrapped past B (and UINT_MAX)
2792 : : * [---) = F not wrapped
2793 : : * [----] = F wrapped past B
2794 : : *
2795 : : * When the boundary is numerically less than the starting point (i.e. the
2796 : : * UINT_MAX wraparound occurs somewhere in between) then all values in
2797 : : * between are wrapped:
2798 : : *
2799 : : * <----B----S---->
2800 : : * [---) = F not wrapped past B (but wrapped past UINT_MAX)
2801 : : * [---) = F wrapped past B (and UINT_MAX)
2802 : : * [----] = F not wrapped
2803 : : *-----------------------------------------------------------------------
2804 : : */
3784 alvherre@alvh.no-ip. 2805 [ + - ]:CBC 588 : if (start < boundary)
2806 [ + - - + ]: 588 : return finish >= boundary || finish < start;
2807 : : else
3784 alvherre@alvh.no-ip. 2808 [ # # # # ]:UBC 0 : return finish >= boundary && finish < start;
2809 : : }
2810 : :
2811 : : /*
2812 : : * Find the starting offset of the given MultiXactId.
2813 : : *
2814 : : * Returns false if the file containing the multi does not exist on disk.
2815 : : * Otherwise, returns true and sets *result to the starting member offset.
2816 : : *
2817 : : * This function does not prevent concurrent truncation, so if that's
2818 : : * required, the caller has to protect against that.
2819 : : */
2820 : : static bool
3746 rhaas@postgresql.org 2821 :CBC 14 : find_multixact_start(MultiXactId multi, MultiXactOffset *result)
2822 : : {
2823 : : MultiXactOffset offset;
2824 : : int64 pageno;
2825 : : int entryno;
2826 : : int slotno;
2827 : : MultiXactOffset *offptr;
2828 : :
3633 andres@anarazel.de 2829 [ - + ]: 14 : Assert(MultiXactState->finishedStartup);
2830 : :
3784 alvherre@alvh.no-ip. 2831 : 14 : pageno = MultiXactIdToOffsetPage(multi);
2832 : 14 : entryno = MultiXactIdToOffsetEntry(multi);
2833 : :
2834 : : /*
2835 : : * Write out dirty data, so PhysicalPageExists can work correctly.
2836 : : */
1807 tmunro@postgresql.or 2837 : 14 : SimpleLruWriteAll(MultiXactOffsetCtl, true);
2838 : 14 : SimpleLruWriteAll(MultiXactMemberCtl, true);
2839 : :
3746 rhaas@postgresql.org 2840 [ - + ]: 14 : if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
3746 rhaas@postgresql.org 2841 :UBC 0 : return false;
2842 : :
2843 : : /* lock is acquired by SimpleLruReadPage_ReadOnly */
3784 alvherre@alvh.no-ip. 2844 :CBC 14 : slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
2845 : 14 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2846 : 14 : offptr += entryno;
2847 : 14 : offset = *offptr;
556 2848 : 14 : LWLockRelease(SimpleLruGetBankLock(MultiXactOffsetCtl, pageno));
2849 : :
3746 rhaas@postgresql.org 2850 : 14 : *result = offset;
2851 : 14 : return true;
2852 : : }
2853 : :
2854 : : /*
2855 : : * GetMultiXactInfo
2856 : : *
2857 : : * Returns information about the current MultiXact state, as of:
2858 : : * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
2859 : : * members: Number of member entries (nextOffset - oldestOffset)
2860 : : * oldestMultiXactId: Oldest MultiXact ID still in use
2861 : : * oldestOffset: Oldest offset still in use
2862 : : *
2863 : : * Returns false if unable to determine, the oldest offset being unknown.
2864 : : */
2865 : : bool
18 michael@paquier.xyz 2866 :GNC 13353 : GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
2867 : : MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
2868 : : {
2869 : : MultiXactOffset nextOffset;
2870 : : MultiXactId nextMultiXactId;
2871 : : bool oldestOffsetKnown;
2872 : :
3774 rhaas@postgresql.org 2873 :CBC 13353 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
2874 : 13353 : nextOffset = MultiXactState->nextOffset;
18 michael@paquier.xyz 2875 :GNC 13353 : *oldestMultiXactId = MultiXactState->oldestMultiXactId;
3774 rhaas@postgresql.org 2876 :CBC 13353 : nextMultiXactId = MultiXactState->nextMXact;
18 michael@paquier.xyz 2877 :GNC 13353 : *oldestOffset = MultiXactState->oldestOffset;
3746 rhaas@postgresql.org 2878 :CBC 13353 : oldestOffsetKnown = MultiXactState->oldestOffsetKnown;
3774 2879 : 13353 : LWLockRelease(MultiXactGenLock);
2880 : :
3746 2881 [ - + ]: 13353 : if (!oldestOffsetKnown)
2882 : : {
18 michael@paquier.xyz 2883 :UNC 0 : *members = 0;
2884 : 0 : *multixacts = 0;
2885 : 0 : *oldestMultiXactId = InvalidMultiXactId;
2886 : 0 : *oldestOffset = 0;
3746 rhaas@postgresql.org 2887 :UBC 0 : return false;
2888 : : }
2889 : :
18 michael@paquier.xyz 2890 :GNC 13353 : *members = nextOffset - *oldestOffset;
2891 : 13353 : *multixacts = nextMultiXactId - *oldestMultiXactId;
3746 rhaas@postgresql.org 2892 :CBC 13353 : return true;
2893 : : }
2894 : :
2895 : : /*
2896 : : * Multixact members can be removed once the multixacts that refer to them
2897 : : * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2898 : : * vacuum_multixact_freeze_table_age work together to make sure we never have
2899 : : * too many multixacts; we hope that, at least under normal circumstances,
2900 : : * this will also be sufficient to keep us from using too many offsets.
2901 : : * However, if the average multixact has many members, we might exhaust the
2902 : : * members space while still using few enough members that these limits fail
2903 : : * to trigger relminmxid advancement by VACUUM. At that point, we'd have no
2904 : : * choice but to start failing multixact-creating operations with an error.
2905 : : *
2906 : : * To prevent that, if more than a threshold portion of the members space is
2907 : : * used, we effectively reduce autovacuum_multixact_freeze_max_age and
2908 : : * to a value just less than the number of multixacts in use. We hope that
2909 : : * this will quickly trigger autovacuuming on the table or tables with the
2910 : : * oldest relminmxid, thus allowing datminmxid values to advance and removing
2911 : : * some members.
2912 : : *
2913 : : * As the fraction of the member space currently in use grows, we become
2914 : : * more aggressive in clamping this value. That not only causes autovacuum
2915 : : * to ramp up, but also makes any manual vacuums the user issues more
2916 : : * aggressive. This happens because vacuum_get_cutoffs() will clamp the
2917 : : * freeze table and the minimum freeze age cutoffs based on the effective
2918 : : * autovacuum_multixact_freeze_max_age this function returns. In the worst
2919 : : * case, we'll claim the freeze_max_age to zero, and every vacuum of any
2920 : : * table will freeze every multixact.
2921 : : */
2922 : : int
3774 2923 : 13353 : MultiXactMemberFreezeThreshold(void)
2924 : : {
2925 : : MultiXactOffset members;
2926 : : uint32 multixacts;
2927 : : uint32 victim_multixacts;
2928 : : double fraction;
2929 : : int result;
2930 : : MultiXactId oldestMultiXactId;
2931 : : MultiXactOffset oldestOffset;
2932 : :
2933 : : /* If we can't determine member space utilization, assume the worst. */
18 michael@paquier.xyz 2934 [ - + ]:GNC 13353 : if (!GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset))
3746 rhaas@postgresql.org 2935 :UBC 0 : return 0;
2936 : :
2937 : : /* If member space utilization is low, no special action is required. */
3774 rhaas@postgresql.org 2938 [ + - ]:CBC 13353 : if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
2939 : 13353 : return autovacuum_multixact_freeze_max_age;
2940 : :
2941 : : /*
2942 : : * Compute a target for relminmxid advancement. The number of multixacts
2943 : : * we try to eliminate from the system is based on how far we are past
2944 : : * MULTIXACT_MEMBER_SAFE_THRESHOLD.
2945 : : */
3774 rhaas@postgresql.org 2946 :UBC 0 : fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
2947 : : (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
2948 : 0 : victim_multixacts = multixacts * fraction;
2949 : :
2950 : : /* fraction could be > 1.0, but lowest possible freeze age is zero */
2951 [ # # ]: 0 : if (victim_multixacts > multixacts)
2952 : 0 : return 0;
450 heikki.linnakangas@i 2953 : 0 : result = multixacts - victim_multixacts;
2954 : :
2955 : : /*
2956 : : * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
2957 : : * autovacuum less aggressive than it would otherwise be.
2958 : : */
2959 : 0 : return Min(result, autovacuum_multixact_freeze_max_age);
2960 : : }
2961 : :
2962 : : typedef struct mxtruncinfo
2963 : : {
2964 : : int64 earliestExistingPage;
2965 : : } mxtruncinfo;
2966 : :
2967 : : /*
2968 : : * SlruScanDirectory callback
2969 : : * This callback determines the earliest existing page number.
2970 : : */
2971 : : static bool
647 akorotkov@postgresql 2972 : 0 : SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data)
2973 : : {
4483 bruce@momjian.us 2974 : 0 : mxtruncinfo *trunc = (mxtruncinfo *) data;
2975 : :
4609 alvherre@alvh.no-ip. 2976 [ # # # # ]: 0 : if (trunc->earliestExistingPage == -1 ||
2977 : 0 : ctl->PagePrecedes(segpage, trunc->earliestExistingPage))
2978 : : {
2979 : 0 : trunc->earliestExistingPage = segpage;
2980 : : }
2981 : :
4483 bruce@momjian.us 2982 : 0 : return false; /* keep going */
2983 : : }
2984 : :
2985 : :
2986 : : /*
2987 : : * Delete members segments [oldest, newOldest)
2988 : : *
2989 : : * The members SLRU can, in contrast to the offsets one, be filled to almost
2990 : : * the full range at once. This means SimpleLruTruncate() can't trivially be
2991 : : * used - instead the to-be-deleted range is computed using the offsets
2992 : : * SLRU. C.f. TruncateMultiXact().
2993 : : */
2994 : : static void
3633 andres@anarazel.de 2995 : 0 : PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
2996 : : {
383 michael@paquier.xyz 2997 : 0 : const int64 maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
2998 : 0 : int64 startsegment = MXOffsetToMemberSegment(oldestOffset);
2999 : 0 : int64 endsegment = MXOffsetToMemberSegment(newOldestOffset);
3000 : 0 : int64 segment = startsegment;
3001 : :
3002 : : /*
3003 : : * Delete all the segments but the last one. The last segment can still
3004 : : * contain, possibly partially, valid data.
3005 : : */
3633 andres@anarazel.de 3006 [ # # ]: 0 : while (segment != endsegment)
3007 : : {
161 peter@eisentraut.org 3008 [ # # ]: 0 : elog(DEBUG2, "truncating multixact members segment %" PRIx64,
3009 : : segment);
3633 andres@anarazel.de 3010 : 0 : SlruDeleteSegment(MultiXactMemberCtl, segment);
3011 : :
3012 : : /* move to next segment, handling wraparound correctly */
3013 [ # # ]: 0 : if (segment == maxsegment)
3014 : 0 : segment = 0;
3015 : : else
3016 : 0 : segment += 1;
3017 : : }
3018 : 0 : }
3019 : :
3020 : : /*
3021 : : * Delete offsets segments [oldest, newOldest)
3022 : : */
3023 : : static void
3024 : 0 : PerformOffsetsTruncation(MultiXactId oldestMulti, MultiXactId newOldestMulti)
3025 : : {
3026 : : /*
3027 : : * We step back one multixact to avoid passing a cutoff page that hasn't
3028 : : * been created yet in the rare case that oldestMulti would be the first
3029 : : * item on a page and oldestMulti == nextMulti. In that case, if we
3030 : : * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
3031 : : * detection.
3032 : : */
3033 : 0 : SimpleLruTruncate(MultiXactOffsetCtl,
3034 : : MultiXactIdToOffsetPage(PreviousMultiXactId(newOldestMulti)));
3035 : 0 : }
3036 : :
3037 : : /*
3038 : : * Remove all MultiXactOffset and MultiXactMember segments before the oldest
3039 : : * ones still of interest.
3040 : : *
3041 : : * This is only called on a primary as part of vacuum (via
3042 : : * vac_truncate_clog()). During recovery truncation is done by replaying
3043 : : * truncation WAL records logged here.
3044 : : *
3045 : : * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
3046 : : * is one of the databases preventing newOldestMulti from increasing.
3047 : : */
3048 : : void
3633 andres@anarazel.de 3049 :CBC 98 : TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
3050 : : {
3051 : : MultiXactId oldestMulti;
3052 : : MultiXactId nextMulti;
3053 : : MultiXactOffset newOldestOffset;
3054 : : MultiXactOffset oldestOffset;
3055 : : MultiXactOffset nextOffset;
3056 : : mxtruncinfo trunc;
3057 : : MultiXactId earliest;
3058 : :
3059 [ - + ]: 98 : Assert(!RecoveryInProgress());
3060 [ - + ]: 98 : Assert(MultiXactState->finishedStartup);
3061 : :
3062 : : /*
3063 : : * We can only allow one truncation to happen at once. Otherwise parts of
3064 : : * members might vanish while we're doing lookups or similar. There's no
3065 : : * need to have an interlock with creating new multis or such, since those
3066 : : * are constrained by the limits (which only grow, never shrink).
3067 : : */
3068 : 98 : LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
3069 : :
4089 alvherre@alvh.no-ip. 3070 : 98 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
3633 andres@anarazel.de 3071 : 98 : nextMulti = MultiXactState->nextMXact;
3746 rhaas@postgresql.org 3072 : 98 : nextOffset = MultiXactState->nextOffset;
3633 andres@anarazel.de 3073 : 98 : oldestMulti = MultiXactState->oldestMultiXactId;
4089 alvherre@alvh.no-ip. 3074 : 98 : LWLockRelease(MultiXactGenLock);
3633 andres@anarazel.de 3075 [ - + ]: 98 : Assert(MultiXactIdIsValid(oldestMulti));
3076 : :
3077 : : /*
3078 : : * Make sure to only attempt truncation if there's values to truncate
3079 : : * away. In normal processing values shouldn't go backwards, but there's
3080 : : * some corner cases (due to bugs) where that's possible.
3081 : : */
3082 [ + - ]: 98 : if (MultiXactIdPrecedesOrEquals(newOldestMulti, oldestMulti))
3083 : : {
3084 : 98 : LWLockRelease(MultiXactTruncationLock);
3085 : 98 : return;
3086 : : }
3087 : :
3088 : : /*
3089 : : * Note we can't just plow ahead with the truncation; it's possible that
3090 : : * there are no segments to truncate, which is a problem because we are
3091 : : * going to attempt to read the offsets page to determine where to
3092 : : * truncate the members SLRU. So we first scan the directory to determine
3093 : : * the earliest offsets page number that we can read without error.
3094 : : *
3095 : : * When nextMXact is less than one segment away from multiWrapLimit,
3096 : : * SlruScanDirCbFindEarliest can find some early segment other than the
3097 : : * actual earliest. (MultiXactOffsetPagePrecedes(EARLIEST, LATEST)
3098 : : * returns false, because not all pairs of entries have the same answer.)
3099 : : * That can also arise when an earlier truncation attempt failed unlink()
3100 : : * or returned early from this function. The only consequence is
3101 : : * returning early, which wastes space that we could have liberated.
3102 : : *
3103 : : * NB: It's also possible that the page that oldestMulti is on has already
3104 : : * been truncated away, and we crashed before updating oldestMulti.
3105 : : */
4609 alvherre@alvh.no-ip. 3106 :UBC 0 : trunc.earliestExistingPage = -1;
3107 : 0 : SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc);
3108 : 0 : earliest = trunc.earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE;
4089 3109 [ # # ]: 0 : if (earliest < FirstMultiXactId)
3110 : 0 : earliest = FirstMultiXactId;
3111 : :
3112 : : /* If there's nothing to remove, we can bail out early. */
3633 andres@anarazel.de 3113 [ # # ]: 0 : if (MultiXactIdPrecedes(oldestMulti, earliest))
3114 : : {
3115 : 0 : LWLockRelease(MultiXactTruncationLock);
7436 tgl@sss.pgh.pa.us 3116 : 0 : return;
3117 : : }
3118 : :
3119 : : /*
3120 : : * First, compute the safe truncation point for MultiXactMember. This is
3121 : : * the starting offset of the oldest multixact.
3122 : : *
3123 : : * Hopefully, find_multixact_start will always work here, because we've
3124 : : * already checked that it doesn't precede the earliest MultiXact on disk.
3125 : : * But if it fails, don't truncate anything, and log a message.
3126 : : */
3633 andres@anarazel.de 3127 [ # # ]: 0 : if (oldestMulti == nextMulti)
3128 : : {
3129 : : /* there are NO MultiXacts */
3130 : 0 : oldestOffset = nextOffset;
3131 : : }
3132 [ # # ]: 0 : else if (!find_multixact_start(oldestMulti, &oldestOffset))
3133 : : {
3746 rhaas@postgresql.org 3134 [ # # ]: 0 : ereport(LOG,
3135 : : (errmsg("oldest MultiXact %u not found, earliest MultiXact %u, skipping truncation",
3136 : : oldestMulti, earliest)));
3633 andres@anarazel.de 3137 : 0 : LWLockRelease(MultiXactTruncationLock);
3746 rhaas@postgresql.org 3138 : 0 : return;
3139 : : }
3140 : :
3141 : : /*
3142 : : * Secondly compute up to where to truncate. Lookup the corresponding
3143 : : * member offset for newOldestMulti for that.
3144 : : */
3633 andres@anarazel.de 3145 [ # # ]: 0 : if (newOldestMulti == nextMulti)
3146 : : {
3147 : : /* there are NO MultiXacts */
3148 : 0 : newOldestOffset = nextOffset;
3149 : : }
3150 [ # # ]: 0 : else if (!find_multixact_start(newOldestMulti, &newOldestOffset))
3151 : : {
3152 [ # # ]: 0 : ereport(LOG,
3153 : : (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
3154 : : newOldestMulti)));
3155 : 0 : LWLockRelease(MultiXactTruncationLock);
3156 : 0 : return;
3157 : : }
3158 : :
3159 [ # # ]: 0 : elog(DEBUG1, "performing multixact truncation: "
3160 : : "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
3161 : : "members [%u, %u), members segments [%" PRIx64 ", %" PRIx64 ")",
3162 : : oldestMulti, newOldestMulti,
3163 : : MultiXactIdToOffsetSegment(oldestMulti),
3164 : : MultiXactIdToOffsetSegment(newOldestMulti),
3165 : : oldestOffset, newOldestOffset,
3166 : : MXOffsetToMemberSegment(oldestOffset),
3167 : : MXOffsetToMemberSegment(newOldestOffset));
3168 : :
3169 : : /*
3170 : : * Do truncation, and the WAL logging of the truncation, in a critical
3171 : : * section. That way offsets/members cannot get out of sync anymore, i.e.
3172 : : * once consistent the newOldestMulti will always exist in members, even
3173 : : * if we crashed in the wrong moment.
3174 : : */
3175 : 0 : START_CRIT_SECTION();
3176 : :
3177 : : /*
3178 : : * Prevent checkpoints from being scheduled concurrently. This is critical
3179 : : * because otherwise a truncation record might not be replayed after a
3180 : : * crash/basebackup, even though the state of the data directory would
3181 : : * require it.
3182 : : */
1247 rhaas@postgresql.org 3183 [ # # ]: 0 : Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0);
3184 : 0 : MyProc->delayChkptFlags |= DELAY_CHKPT_START;
3185 : :
3186 : : /* WAL log truncation */
3633 andres@anarazel.de 3187 : 0 : WriteMTruncateXlogRec(newOldestMultiDB,
3188 : : oldestMulti, newOldestMulti,
3189 : : oldestOffset, newOldestOffset);
3190 : :
3191 : : /*
3192 : : * Update in-memory limits before performing the truncation, while inside
3193 : : * the critical section: Have to do it before truncation, to prevent
3194 : : * concurrent lookups of those values. Has to be inside the critical
3195 : : * section as otherwise a future call to this function would error out,
3196 : : * while looking up the oldest member in offsets, if our caller crashes
3197 : : * before updating the limits.
3198 : : */
3199 : 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
3200 : 0 : MultiXactState->oldestMultiXactId = newOldestMulti;
3201 : 0 : MultiXactState->oldestMultiXactDB = newOldestMultiDB;
3202 : 0 : LWLockRelease(MultiXactGenLock);
3203 : :
3204 : : /* First truncate members */
3205 : 0 : PerformMembersTruncation(oldestOffset, newOldestOffset);
3206 : :
3207 : : /* Then offsets */
3208 : 0 : PerformOffsetsTruncation(oldestMulti, newOldestMulti);
3209 : :
1247 rhaas@postgresql.org 3210 : 0 : MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
3211 : :
3633 andres@anarazel.de 3212 [ # # ]: 0 : END_CRIT_SECTION();
3213 : 0 : LWLockRelease(MultiXactTruncationLock);
3214 : : }
3215 : :
3216 : : /*
3217 : : * Decide whether a MultiXactOffset page number is "older" for truncation
3218 : : * purposes. Analogous to CLOGPagePrecedes().
3219 : : *
3220 : : * Offsetting the values is optional, because MultiXactIdPrecedes() has
3221 : : * translational symmetry.
3222 : : */
3223 : : static bool
647 akorotkov@postgresql 3224 :CBC 40131 : MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
3225 : : {
3226 : : MultiXactId multi1;
3227 : : MultiXactId multi2;
3228 : :
7436 tgl@sss.pgh.pa.us 3229 : 40131 : multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE;
1694 noah@leadboat.com 3230 : 40131 : multi1 += FirstMultiXactId + 1;
7436 tgl@sss.pgh.pa.us 3231 : 40131 : multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE;
1694 noah@leadboat.com 3232 : 40131 : multi2 += FirstMultiXactId + 1;
3233 : :
3234 [ + + + + ]: 66885 : return (MultiXactIdPrecedes(multi1, multi2) &&
3235 : 26754 : MultiXactIdPrecedes(multi1,
3236 : : multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));
3237 : : }
3238 : :
3239 : : /*
3240 : : * Decide whether a MultiXactMember page number is "older" for truncation
3241 : : * purposes. There is no "invalid offset number" so use the numbers verbatim.
3242 : : */
3243 : : static bool
647 akorotkov@postgresql 3244 :UBC 0 : MultiXactMemberPagePrecedes(int64 page1, int64 page2)
3245 : : {
3246 : : MultiXactOffset offset1;
3247 : : MultiXactOffset offset2;
3248 : :
7395 tgl@sss.pgh.pa.us 3249 : 0 : offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
3250 : 0 : offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
3251 : :
1694 noah@leadboat.com 3252 [ # # # # ]: 0 : return (MultiXactOffsetPrecedes(offset1, offset2) &&
3253 : 0 : MultiXactOffsetPrecedes(offset1,
3254 : : offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1));
3255 : : }
3256 : :
3257 : : /*
3258 : : * Decide which of two MultiXactIds is earlier.
3259 : : *
3260 : : * XXX do we need to do something special for InvalidMultiXactId?
3261 : : * (Doesn't look like it.)
3262 : : */
3263 : : bool
7436 tgl@sss.pgh.pa.us 3264 :CBC 586154 : MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
3265 : : {
7266 bruce@momjian.us 3266 : 586154 : int32 diff = (int32) (multi1 - multi2);
3267 : :
7436 tgl@sss.pgh.pa.us 3268 : 586154 : return (diff < 0);
3269 : : }
3270 : :
3271 : : /*
3272 : : * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
3273 : : *
3274 : : * XXX do we need to do something special for InvalidMultiXactId?
3275 : : * (Doesn't look like it.)
3276 : : */
3277 : : bool
4300 alvherre@alvh.no-ip. 3278 : 5680 : MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
3279 : : {
3280 : 5680 : int32 diff = (int32) (multi1 - multi2);
3281 : :
3282 : 5680 : return (diff <= 0);
3283 : : }
3284 : :
3285 : :
3286 : : /*
3287 : : * Decide which of two offsets is earlier.
3288 : : */
3289 : : static bool
7395 tgl@sss.pgh.pa.us 3290 : 666 : MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
3291 : : {
7266 bruce@momjian.us 3292 : 666 : int32 diff = (int32) (offset1 - offset2);
3293 : :
7436 tgl@sss.pgh.pa.us 3294 : 666 : return (diff < 0);
3295 : : }
3296 : :
3297 : : /*
3298 : : * Write a TRUNCATE xlog record
3299 : : *
3300 : : * We must flush the xlog record to disk before returning --- see notes in
3301 : : * TruncateCLOG().
3302 : : */
3303 : : static void
3633 andres@anarazel.de 3304 :UBC 0 : WriteMTruncateXlogRec(Oid oldestMultiDB,
3305 : : MultiXactId startTruncOff, MultiXactId endTruncOff,
3306 : : MultiXactOffset startTruncMemb, MultiXactOffset endTruncMemb)
3307 : : {
3308 : : XLogRecPtr recptr;
3309 : : xl_multixact_truncate xlrec;
3310 : :
3311 : 0 : xlrec.oldestMultiDB = oldestMultiDB;
3312 : :
3313 : 0 : xlrec.startTruncOff = startTruncOff;
3314 : 0 : xlrec.endTruncOff = endTruncOff;
3315 : :
3316 : 0 : xlrec.startTruncMemb = startTruncMemb;
3317 : 0 : xlrec.endTruncMemb = endTruncMemb;
3318 : :
3319 : 0 : XLogBeginInsert();
207 peter@eisentraut.org 3320 : 0 : XLogRegisterData(&xlrec, SizeOfMultiXactTruncate);
3633 andres@anarazel.de 3321 : 0 : recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID);
3322 : 0 : XLogFlush(recptr);
3323 : 0 : }
3324 : :
3325 : : /*
3326 : : * MULTIXACT resource manager's routines
3327 : : */
3328 : : void
3943 heikki.linnakangas@i 3329 :CBC 4 : multixact_redo(XLogReaderState *record)
3330 : : {
3331 : 4 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
3332 : :
3333 : : /* Backup blocks are not used in multixact records */
3334 [ - + ]: 4 : Assert(!XLogRecHasAnyBlockRefs(record));
3335 : :
7395 tgl@sss.pgh.pa.us 3336 [ + + ]: 4 : if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
3337 : : {
3338 : : int64 pageno;
3339 : :
647 akorotkov@postgresql 3340 : 1 : memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
61 alvherre@kurilemu.de 3341 :GNC 1 : SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno);
3342 : : }
7395 tgl@sss.pgh.pa.us 3343 [ + + ]:CBC 3 : else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
3344 : : {
3345 : : int64 pageno;
3346 : :
647 akorotkov@postgresql 3347 : 1 : memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
61 alvherre@kurilemu.de 3348 :GNC 1 : SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno);
3349 : : }
7395 tgl@sss.pgh.pa.us 3350 [ + - ]:CBC 2 : else if (info == XLOG_MULTIXACT_CREATE_ID)
3351 : : {
4609 alvherre@alvh.no-ip. 3352 : 2 : xl_multixact_create *xlrec =
841 tgl@sss.pgh.pa.us 3353 : 2 : (xl_multixact_create *) XLogRecGetData(record);
3354 : : TransactionId max_xid;
3355 : : int i;
3356 : :
3357 : : /* Store the data back into the SLRU files */
4609 alvherre@alvh.no-ip. 3358 : 2 : RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
3359 : 2 : xlrec->members);
3360 : :
3361 : : /* Make sure nextMXact/nextOffset are beyond what this record has */
3362 : 2 : MultiXactAdvanceNextMXact(xlrec->mid + 1,
3363 : 2 : xlrec->moff + xlrec->nmembers);
3364 : :
3365 : : /*
3366 : : * Make sure nextXid is beyond any XID mentioned in the record. This
3367 : : * should be unnecessary, since any XID found here ought to have other
3368 : : * evidence in the XLOG, but let's be safe.
3369 : : */
3943 heikki.linnakangas@i 3370 : 2 : max_xid = XLogRecGetXid(record);
4609 alvherre@alvh.no-ip. 3371 [ + + ]: 6 : for (i = 0; i < xlrec->nmembers; i++)
3372 : : {
3373 [ - + ]: 4 : if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
4609 alvherre@alvh.no-ip. 3374 :UBC 0 : max_xid = xlrec->members[i].xid;
3375 : : }
3376 : :
2354 tmunro@postgresql.or 3377 :CBC 2 : AdvanceNextFullTransactionIdPastXid(max_xid);
3378 : : }
3633 andres@anarazel.de 3379 [ # # ]:UBC 0 : else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
3380 : : {
3381 : : xl_multixact_truncate xlrec;
3382 : : int64 pageno;
3383 : :
3384 : 0 : memcpy(&xlrec, XLogRecGetData(record),
3385 : : SizeOfMultiXactTruncate);
3386 : :
3387 [ # # ]: 0 : elog(DEBUG1, "replaying multixact truncation: "
3388 : : "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
3389 : : "members [%u, %u), members segments [%" PRIx64 ", %" PRIx64 ")",
3390 : : xlrec.startTruncOff, xlrec.endTruncOff,
3391 : : MultiXactIdToOffsetSegment(xlrec.startTruncOff),
3392 : : MultiXactIdToOffsetSegment(xlrec.endTruncOff),
3393 : : xlrec.startTruncMemb, xlrec.endTruncMemb,
3394 : : MXOffsetToMemberSegment(xlrec.startTruncMemb),
3395 : : MXOffsetToMemberSegment(xlrec.endTruncMemb));
3396 : :
3397 : : /* should not be required, but more than cheap enough */
3398 : 0 : LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
3399 : :
3400 : : /*
3401 : : * Advance the horizon values, so they're current at the end of
3402 : : * recovery.
3403 : : */
3098 tgl@sss.pgh.pa.us 3404 : 0 : SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB, false);
3405 : :
3633 andres@anarazel.de 3406 : 0 : PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
3407 : :
3408 : : /*
3409 : : * During XLOG replay, latest_page_number isn't necessarily set up
3410 : : * yet; insert a suitable value to bypass the sanity test in
3411 : : * SimpleLruTruncate.
3412 : : */
3413 : 0 : pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
578 alvherre@alvh.no-ip. 3414 : 0 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
3415 : : pageno);
3633 andres@anarazel.de 3416 : 0 : PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff);
3417 : :
3418 : 0 : LWLockRelease(MultiXactTruncationLock);
3419 : : }
3420 : : else
7395 tgl@sss.pgh.pa.us 3421 [ # # ]: 0 : elog(PANIC, "multixact_redo: unknown op code %u", info);
7395 tgl@sss.pgh.pa.us 3422 :CBC 4 : }
3423 : :
3424 : : /*
3425 : : * Entrypoint for sync.c to sync offsets files.
3426 : : */
3427 : : int
1807 tmunro@postgresql.or 3428 :UBC 0 : multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
3429 : : {
3430 : 0 : return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
3431 : : }
3432 : :
3433 : : /*
3434 : : * Entrypoint for sync.c to sync members files.
3435 : : */
3436 : : int
3437 : 0 : multixactmemberssyncfiletag(const FileTag *ftag, char *path)
3438 : : {
3439 : 0 : return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
3440 : : }
|