Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * multixact.c
4 : : * PostgreSQL multi-transaction-log manager
5 : : *
6 : : * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 : : * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 : : * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 : : * TransactionId and a set of flag bits. The name is a bit historical:
10 : : * originally, a MultiXactId consisted of more than one TransactionId (except
11 : : * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 : : * legitimate to have MultiXactIds that only include a single Xid.
13 : : *
14 : : * The meaning of the flag bits is opaque to this module, but they are mostly
15 : : * used in heapam.c to identify lock modes that each of the member transactions
16 : : * is holding on any given tuple. This module just contains support to store
17 : : * and retrieve the arrays.
18 : : *
19 : : * We use two SLRU areas, one for storing the offsets at which the data
20 : : * starts for each MultiXactId in the other one. This trick allows us to
21 : : * store variable length arrays of TransactionIds. (We could alternatively
22 : : * use one area containing counts and TransactionIds, with valid MultiXactId
23 : : * values pointing at slots containing counts; but that way seems less robust
24 : : * since it would get completely confused if someone inquired about a bogus
25 : : * MultiXactId that pointed to an intermediate slot containing an XID.)
26 : : *
27 : : * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 : : * MEMBERs page is initialized to zeroes, as well as an
29 : : * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 : : * This module ignores the WAL rule "write xlog before data," because it
31 : : * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 : : * rule. The only way for the MXID to be referenced from any data page is for
33 : : * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 : : * an XLOG record that must follow ours. The normal LSN interlock between the
35 : : * data page and that XLOG record will ensure that our XLOG record reaches
36 : : * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 : : * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 : : * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 : : * module's XLOG records completely rebuild the data entered since the last
40 : : * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 : : * before each checkpoint is considered complete.
42 : : *
43 : : * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 : : * crashes and ensure that MXID and offset numbering increases monotonically
45 : : * across a crash. We do this in the same way as it's done for transaction
46 : : * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 : : * could need to worry about, and we just make sure that at the end of
48 : : * replay, the next-MXID and next-offset counters are at least as large as
49 : : * anything we saw during replay.
50 : : *
51 : : * We are able to remove segments no longer necessary by carefully tracking
52 : : * each table's used values: during vacuum, any multixact older than a certain
53 : : * value is removed; the cutoff value is stored in pg_class. The minimum value
54 : : * across all tables in each database is stored in pg_database, and the global
55 : : * minimum across all databases is part of pg_control and is kept in shared
56 : : * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 : : *
58 : : * When new multixactid values are to be created, care is taken that the
59 : : * counter does not fall within the wraparound horizon considering the global
60 : : * minimum value.
61 : : *
62 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
63 : : * Portions Copyright (c) 1994, Regents of the University of California
64 : : *
65 : : * src/backend/access/transam/multixact.c
66 : : *
67 : : *-------------------------------------------------------------------------
68 : : */
69 : : #include "postgres.h"
70 : :
71 : : #include "access/multixact.h"
72 : : #include "access/multixact_internal.h"
73 : : #include "access/slru.h"
74 : : #include "access/twophase.h"
75 : : #include "access/twophase_rmgr.h"
76 : : #include "access/xlog.h"
77 : : #include "access/xloginsert.h"
78 : : #include "access/xlogutils.h"
79 : : #include "miscadmin.h"
80 : : #include "pg_trace.h"
81 : : #include "pgstat.h"
82 : : #include "postmaster/autovacuum.h"
83 : : #include "storage/pmsignal.h"
84 : : #include "storage/proc.h"
85 : : #include "storage/procarray.h"
86 : : #include "utils/guc_hooks.h"
87 : : #include "utils/injection_point.h"
88 : : #include "utils/lsyscache.h"
89 : : #include "utils/memutils.h"
90 : :
91 : :
92 : : /*
93 : : * Thresholds used to keep members disk usage in check when multixids have a
94 : : * lot of members. When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
95 : : * starts freezing multixids more aggressively, even if the normal multixid
96 : : * age limits haven't been reached yet.
97 : : */
98 : : #define MULTIXACT_MEMBER_LOW_THRESHOLD UINT64CONST(2000000000)
99 : : #define MULTIXACT_MEMBER_HIGH_THRESHOLD UINT64CONST(4000000000)
100 : :
101 : : static inline MultiXactId
6 heikki.linnakangas@i 102 :GNC 107672 : NextMultiXactId(MultiXactId multi)
103 : : {
104 [ + + ]: 107672 : return multi == MaxMultiXactId ? FirstMultiXactId : multi + 1;
105 : : }
106 : :
107 : : static inline MultiXactId
550 heikki.linnakangas@i 108 :UBC 0 : PreviousMultiXactId(MultiXactId multi)
109 : : {
110 [ # # ]: 0 : return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
111 : : }
112 : :
113 : : /*
114 : : * Links to shared-memory data structures for MultiXact control
115 : : */
116 : : static SlruCtlData MultiXactOffsetCtlData;
117 : : static SlruCtlData MultiXactMemberCtlData;
118 : :
119 : : #define MultiXactOffsetCtl (&MultiXactOffsetCtlData)
120 : : #define MultiXactMemberCtl (&MultiXactMemberCtlData)
121 : :
122 : : /*
123 : : * MultiXact state shared across all backends. All this state is protected
124 : : * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
125 : : * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
126 : : * concurrency's sake, we avoid holding more than one of these locks at a
127 : : * time.)
128 : : */
129 : : typedef struct MultiXactStateData
130 : : {
131 : : /* next-to-be-assigned MultiXactId */
132 : : MultiXactId nextMXact;
133 : :
134 : : /* next-to-be-assigned offset */
135 : : MultiXactOffset nextOffset;
136 : :
137 : : /* Have we completed multixact startup? */
138 : : bool finishedStartup;
139 : :
140 : : /*
141 : : * Oldest multixact that is still potentially referenced by a relation.
142 : : * Anything older than this should not be consulted. These values are
143 : : * updated by vacuum.
144 : : */
145 : : MultiXactId oldestMultiXactId;
146 : : Oid oldestMultiXactDB;
147 : :
148 : : /*
149 : : * Oldest multixact offset that is potentially referenced by a multixact
150 : : * referenced by a relation.
151 : : */
152 : : MultiXactOffset oldestOffset;
153 : :
154 : : /* support for anti-wraparound measures */
155 : : MultiXactId multiVacLimit;
156 : : MultiXactId multiWarnLimit;
157 : : MultiXactId multiStopLimit;
158 : : MultiXactId multiWrapLimit;
159 : :
160 : : /*
161 : : * Per-backend data starts here. We have two arrays stored in the area
162 : : * immediately following the MultiXactStateData struct. Each is indexed by
163 : : * ProcNumber.
164 : : *
165 : : * In both arrays, there's a slot for all normal backends
166 : : * (0..MaxBackends-1) followed by a slot for max_prepared_xacts prepared
167 : : * transactions.
168 : : *
169 : : * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
170 : : * transaction(s) could possibly be a member of, or InvalidMultiXactId
171 : : * when the backend has no live transaction that could possibly be a
172 : : * member of a MultiXact. Each backend sets its entry to the current
173 : : * nextMXact counter just before first acquiring a shared lock in a given
174 : : * transaction, and clears it at transaction end. (This works because only
175 : : * during or after acquiring a shared lock could an XID possibly become a
176 : : * member of a MultiXact, and that MultiXact would have to be created
177 : : * during or after the lock acquisition.)
178 : : *
179 : : * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
180 : : * current transaction(s) think is potentially live, or InvalidMultiXactId
181 : : * when not in a transaction or not in a transaction that's paid any
182 : : * attention to MultiXacts yet. This is computed when first needed in a
183 : : * given transaction, and cleared at transaction end. We can compute it
184 : : * as the minimum of the valid OldestMemberMXactId[] entries at the time
185 : : * we compute it (using nextMXact if none are valid). Each backend is
186 : : * required not to attempt to access any SLRU data for MultiXactIds older
187 : : * than its own OldestVisibleMXactId[] setting; this is necessary because
188 : : * the relevant SLRU data can be concurrently truncated away.
189 : : *
190 : : * The oldest valid value among all of the OldestMemberMXactId[] and
191 : : * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
192 : : * possible value still having any live member transaction -- OldestMxact.
193 : : * Any value older than that is typically removed from tuple headers, or
194 : : * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
195 : : * remove an individual MultiXact xmax whose value is >= its OldestMxact
196 : : * cutoff, though typically only when no individual member XID is still
197 : : * running. See FreezeMultiXactId for full details.
198 : : *
199 : : * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
200 : : * or the oldest extant Multi remaining in the table is used as the new
201 : : * pg_class.relminmxid value (whichever is earlier). The minimum of all
202 : : * relminmxid values in each database is stored in pg_database.datminmxid.
203 : : * In turn, the minimum of all of those values is stored in pg_control.
204 : : * This is used as the truncation point for pg_multixact when unneeded
205 : : * segments get removed by vac_truncate_clog() during vacuuming.
206 : : */
207 : : MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER];
208 : : } MultiXactStateData;
209 : :
210 : : /*
211 : : * Size of OldestMemberMXactId and OldestVisibleMXactId arrays.
212 : : */
213 : : #define MaxOldestSlot (MaxBackends + max_prepared_xacts)
214 : :
215 : : /* Pointers to the state data in shared memory */
216 : : static MultiXactStateData *MultiXactState;
217 : : static MultiXactId *OldestMemberMXactId;
218 : : static MultiXactId *OldestVisibleMXactId;
219 : :
220 : :
221 : : /*
222 : : * Definitions for the backend-local MultiXactId cache.
223 : : *
224 : : * We use this cache to store known MultiXacts, so we don't need to go to
225 : : * SLRU areas every time.
226 : : *
227 : : * The cache lasts for the duration of a single transaction, the rationale
228 : : * for this being that most entries will contain our own TransactionId and
229 : : * so they will be uninteresting by the time our next transaction starts.
230 : : * (XXX not clear that this is correct --- other members of the MultiXact
231 : : * could hang around longer than we did. However, it's not clear what a
232 : : * better policy for flushing old cache entries would be.) FIXME actually
233 : : * this is plain wrong now that multixact's may contain update Xids.
234 : : *
235 : : * We allocate the cache entries in a memory context that is deleted at
236 : : * transaction end, so we don't need to do retail freeing of entries.
237 : : */
238 : : typedef struct mXactCacheEnt
239 : : {
240 : : MultiXactId multi;
241 : : int nmembers;
242 : : dlist_node node;
243 : : MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
244 : : } mXactCacheEnt;
245 : :
246 : : #define MAX_CACHE_ENTRIES 256
247 : : static dclist_head MXactCache = DCLIST_STATIC_INIT(MXactCache);
248 : : static MemoryContext MXactContext = NULL;
249 : :
250 : : #ifdef MULTIXACT_DEBUG
251 : : #define debug_elog2(a,b) elog(a,b)
252 : : #define debug_elog3(a,b,c) elog(a,b,c)
253 : : #define debug_elog4(a,b,c,d) elog(a,b,c,d)
254 : : #define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
255 : : #define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
256 : : #else
257 : : #define debug_elog2(a,b)
258 : : #define debug_elog3(a,b,c)
259 : : #define debug_elog4(a,b,c,d)
260 : : #define debug_elog5(a,b,c,d,e)
261 : : #define debug_elog6(a,b,c,d,e,f)
262 : : #endif
263 : :
264 : : /* internal MultiXactId management */
265 : : static void MultiXactIdSetOldestVisible(void);
266 : : static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
267 : : int nmembers, MultiXactMember *members);
268 : : static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
269 : :
270 : : /* MultiXact cache management */
271 : : static int mxactMemberComparator(const void *arg1, const void *arg2);
272 : : static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
273 : : static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
274 : : static void mXactCachePut(MultiXactId multi, int nmembers,
275 : : MultiXactMember *members);
276 : :
277 : : /* management of SLRU infrastructure */
278 : : static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
279 : : static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
280 : : static void ExtendMultiXactOffset(MultiXactId multi);
281 : : static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
282 : : static void SetOldestOffset(void);
283 : : static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
284 : : static void WriteMTruncateXlogRec(Oid oldestMultiDB,
285 : : MultiXactId startTruncOff,
286 : : MultiXactId endTruncOff,
287 : : MultiXactOffset startTruncMemb,
288 : : MultiXactOffset endTruncMemb);
289 : :
290 : :
291 : : /*
292 : : * MultiXactIdCreate
293 : : * Construct a MultiXactId representing two TransactionIds.
294 : : *
295 : : * The two XIDs must be different, or be requesting different statuses.
296 : : *
297 : : * NB - we don't worry about our local MultiXactId cache here, because that
298 : : * is handled by the lower-level routines.
299 : : */
300 : : MultiXactId
4712 alvherre@alvh.no-ip. 301 :CBC 1106 : MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
302 : : TransactionId xid2, MultiXactStatus status2)
303 : : {
304 : : MultiXactId newMulti;
305 : : MultiXactMember members[2];
306 : :
1147 peter@eisentraut.org 307 [ - + ]: 1106 : Assert(TransactionIdIsValid(xid1));
308 [ - + ]: 1106 : Assert(TransactionIdIsValid(xid2));
309 : :
4712 alvherre@alvh.no-ip. 310 [ + + - + ]: 1106 : Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
311 : :
312 : : /* MultiXactIdSetOldestMember() must have been called already. */
655 heikki.linnakangas@i 313 [ - + ]: 1106 : Assert(MultiXactIdIsValid(OldestMemberMXactId[MyProcNumber]));
314 : :
315 : : /*
316 : : * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
317 : : * are still running. In typical usage, xid2 will be our own XID and the
318 : : * caller just did a check on xid1, so it'd be wasted effort.
319 : : */
320 : :
4712 alvherre@alvh.no-ip. 321 : 1106 : members[0].xid = xid1;
322 : 1106 : members[0].status = status1;
323 : 1106 : members[1].xid = xid2;
324 : 1106 : members[1].status = status2;
325 : :
4385 326 : 1106 : newMulti = MultiXactIdCreateFromMembers(2, members);
327 : :
328 : : debug_elog3(DEBUG2, "Create: %s",
329 : : mxid_to_string(newMulti, 2, members));
330 : :
7534 tgl@sss.pgh.pa.us 331 : 1106 : return newMulti;
332 : : }
333 : :
334 : : /*
335 : : * MultiXactIdExpand
336 : : * Add a TransactionId to a pre-existing MultiXactId.
337 : : *
338 : : * If the TransactionId is already a member of the passed MultiXactId with the
339 : : * same status, just return it as-is.
340 : : *
341 : : * Note that we do NOT actually modify the membership of a pre-existing
342 : : * MultiXactId; instead we create a new one. This is necessary to avoid
343 : : * a race condition against code trying to wait for one MultiXactId to finish;
344 : : * see notes in heapam.c.
345 : : *
346 : : * NB - we don't worry about our local MultiXactId cache here, because that
347 : : * is handled by the lower-level routines.
348 : : *
349 : : * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
350 : : * one upgraded by pg_upgrade from a cluster older than this feature) are not
351 : : * passed in.
352 : : */
353 : : MultiXactId
4712 alvherre@alvh.no-ip. 354 : 75543 : MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
355 : : {
356 : : MultiXactId newMulti;
357 : : MultiXactMember *members;
358 : : MultiXactMember *newMembers;
359 : : int nmembers;
360 : : int i;
361 : : int j;
362 : :
1147 peter@eisentraut.org 363 [ - + ]: 75543 : Assert(MultiXactIdIsValid(multi));
364 [ - + ]: 75543 : Assert(TransactionIdIsValid(xid));
365 : :
366 : : /* MultiXactIdSetOldestMember() must have been called already. */
655 heikki.linnakangas@i 367 [ - + ]: 75543 : Assert(MultiXactIdIsValid(OldestMemberMXactId[MyProcNumber]));
368 : :
369 : : debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
370 : : multi, xid, mxstatus_to_string(status));
371 : :
372 : : /*
373 : : * Note: we don't allow for old multis here. The reason is that the only
374 : : * caller of this function does a check that the multixact is no longer
375 : : * running.
376 : : */
4160 alvherre@alvh.no-ip. 377 : 75543 : nmembers = GetMultiXactIdMembers(multi, &members, false, false);
378 : :
7539 tgl@sss.pgh.pa.us 379 [ - + ]: 75543 : if (nmembers < 0)
380 : : {
381 : : MultiXactMember member;
382 : :
383 : : /*
384 : : * The MultiXactId is obsolete. This can only happen if all the
385 : : * MultiXactId members stop running between the caller checking and
386 : : * passing it to us. It would be better to return that fact to the
387 : : * caller, but it would complicate the API and it's unlikely to happen
388 : : * too often, so just deal with it by creating a singleton MultiXact.
389 : : */
4712 alvherre@alvh.no-ip. 390 :UBC 0 : member.xid = xid;
391 : 0 : member.status = status;
4385 392 : 0 : newMulti = MultiXactIdCreateFromMembers(1, &member);
393 : :
394 : : debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
395 : : multi, newMulti);
7539 tgl@sss.pgh.pa.us 396 : 0 : return newMulti;
397 : : }
398 : :
399 : : /*
400 : : * If the TransactionId is already a member of the MultiXactId with the
401 : : * same status, just return the existing MultiXactId.
402 : : */
7539 tgl@sss.pgh.pa.us 403 [ + + ]:CBC 1465954 : for (i = 0; i < nmembers; i++)
404 : : {
4712 alvherre@alvh.no-ip. 405 [ + + ]: 1390411 : if (TransactionIdEquals(members[i].xid, xid) &&
406 [ - + ]: 54 : (members[i].status == status))
407 : : {
408 : : debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
409 : : xid, multi);
7530 tgl@sss.pgh.pa.us 410 :UBC 0 : pfree(members);
7539 411 : 0 : return multi;
412 : : }
413 : : }
414 : :
415 : : /*
416 : : * Determine which of the members of the MultiXactId are still of
417 : : * interest. This is any running transaction, and also any transaction
418 : : * that grabbed something stronger than just a lock and was committed. (An
419 : : * update that aborted is of no interest here; and having more than one
420 : : * update Xid in a multixact would cause errors elsewhere.)
421 : : *
422 : : * Removing dead members is not just an optimization: freezing of tuples
423 : : * whose Xmax are multis depends on this behavior.
424 : : *
425 : : * Note we have the same race condition here as above: j could be 0 at the
426 : : * end of the loop.
427 : : */
8 michael@paquier.xyz 428 :GNC 75543 : newMembers = palloc_array(MultiXactMember, nmembers + 1);
429 : :
7539 tgl@sss.pgh.pa.us 430 [ + + ]:CBC 1465954 : for (i = 0, j = 0; i < nmembers; i++)
431 : : {
4712 alvherre@alvh.no-ip. 432 [ + + ]: 1390411 : if (TransactionIdIsInProgress(members[i].xid) ||
4256 433 [ + + - + ]: 74684 : (ISUPDATE_from_mxstatus(members[i].status) &&
4712 434 : 17 : TransactionIdDidCommit(members[i].xid)))
435 : : {
436 : 1315744 : newMembers[j].xid = members[i].xid;
437 : 1315744 : newMembers[j++].status = members[i].status;
438 : : }
439 : : }
440 : :
441 : 75543 : newMembers[j].xid = xid;
442 : 75543 : newMembers[j++].status = status;
4385 443 : 75543 : newMulti = MultiXactIdCreateFromMembers(j, newMembers);
444 : :
7539 tgl@sss.pgh.pa.us 445 : 75543 : pfree(members);
446 : 75543 : pfree(newMembers);
447 : :
448 : : debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
449 : :
450 : 75543 : return newMulti;
451 : : }
452 : :
453 : : /*
454 : : * MultiXactIdIsRunning
455 : : * Returns whether a MultiXactId is "running".
456 : : *
457 : : * We return true if at least one member of the given MultiXactId is still
458 : : * running. Note that a "false" result is certain not to change,
459 : : * because it is not legal to add members to an existing MultiXactId.
460 : : *
461 : : * Caller is expected to have verified that the multixact does not come from
462 : : * a pg_upgraded share-locked tuple.
463 : : */
464 : : bool
4160 alvherre@alvh.no-ip. 465 : 149843 : MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
466 : : {
467 : : MultiXactMember *members;
468 : : int nmembers;
469 : : int i;
470 : :
471 : : debug_elog3(DEBUG2, "IsRunning %u?", multi);
472 : :
473 : : /*
474 : : * "false" here means we assume our callers have checked that the given
475 : : * multi cannot possibly come from a pg_upgraded database.
476 : : */
477 : 149843 : nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
478 : :
3905 479 [ + + ]: 149843 : if (nmembers <= 0)
480 : : {
481 : : debug_elog2(DEBUG2, "IsRunning: no members");
7539 tgl@sss.pgh.pa.us 482 : 729 : return false;
483 : : }
484 : :
485 : : /*
486 : : * Checking for myself is cheap compared to looking in shared memory;
487 : : * return true if any live subtransaction of the current top-level
488 : : * transaction is a member.
489 : : *
490 : : * This is not needed for correctness, it's just a fast path.
491 : : */
492 [ + + ]: 2891517 : for (i = 0; i < nmembers; i++)
493 : : {
4712 alvherre@alvh.no-ip. 494 [ + + ]: 2742559 : if (TransactionIdIsCurrentTransactionId(members[i].xid))
495 : : {
496 : : debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
7530 tgl@sss.pgh.pa.us 497 : 156 : pfree(members);
7539 498 : 156 : return true;
499 : : }
500 : : }
501 : :
502 : : /*
503 : : * This could be made faster by having another entry point in procarray.c,
504 : : * walking the PGPROC array only once for all the members. But in most
505 : : * cases nmembers should be small enough that it doesn't much matter.
506 : : */
507 [ + + ]: 296156 : for (i = 0; i < nmembers; i++)
508 : : {
4712 alvherre@alvh.no-ip. 509 [ + + ]: 296116 : if (TransactionIdIsInProgress(members[i].xid))
510 : : {
511 : : debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
512 : : i, members[i].xid);
7530 tgl@sss.pgh.pa.us 513 : 148918 : pfree(members);
7539 514 : 148918 : return true;
515 : : }
516 : : }
517 : :
518 : 40 : pfree(members);
519 : :
520 : : debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
521 : :
522 : 40 : return false;
523 : : }
524 : :
525 : : /*
526 : : * MultiXactIdSetOldestMember
527 : : * Save the oldest MultiXactId this transaction could be a member of.
528 : : *
529 : : * We set the OldestMemberMXactId for a given transaction the first time it's
530 : : * going to do some operation that might require a MultiXactId (tuple lock,
531 : : * update or delete). We need to do this even if we end up using a
532 : : * TransactionId instead of a MultiXactId, because there is a chance that
533 : : * another transaction would add our XID to a MultiXactId.
534 : : *
535 : : * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
536 : : * be called just before doing any such possibly-MultiXactId-able operation.
537 : : */
538 : : void
539 : 1921087 : MultiXactIdSetOldestMember(void)
540 : : {
655 heikki.linnakangas@i 541 [ + + ]: 1921087 : if (!MultiXactIdIsValid(OldestMemberMXactId[MyProcNumber]))
542 : : {
543 : : MultiXactId nextMXact;
544 : :
545 : : /*
546 : : * You might think we don't need to acquire a lock here, since
547 : : * fetching and storing of TransactionIds is probably atomic, but in
548 : : * fact we do: suppose we pick up nextMXact and then lose the CPU for
549 : : * a long time. Someone else could advance nextMXact, and then
550 : : * another someone else could compute an OldestVisibleMXactId that
551 : : * would be after the value we are going to store when we get control
552 : : * back. Which would be wrong.
553 : : *
554 : : * Note that a shared lock is sufficient, because it's enough to stop
555 : : * someone from advancing nextMXact; and nobody else could be trying
556 : : * to write to our OldestMember entry, only reading (and we assume
557 : : * storing it is atomic.)
558 : : */
4368 alvherre@alvh.no-ip. 559 : 74626 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
560 : :
7539 tgl@sss.pgh.pa.us 561 : 74626 : nextMXact = MultiXactState->nextMXact;
562 : :
655 heikki.linnakangas@i 563 : 74626 : OldestMemberMXactId[MyProcNumber] = nextMXact;
564 : :
7539 tgl@sss.pgh.pa.us 565 : 74626 : LWLockRelease(MultiXactGenLock);
566 : :
567 : : debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
568 : : MyProcNumber, nextMXact);
569 : : }
570 : 1921087 : }
571 : :
572 : : /*
573 : : * MultiXactIdSetOldestVisible
574 : : * Save the oldest MultiXactId this transaction considers possibly live.
575 : : *
576 : : * We set the OldestVisibleMXactId for a given transaction the first time
577 : : * it's going to inspect any MultiXactId. Once we have set this, we are
578 : : * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
579 : : * won't be truncated away.
580 : : *
581 : : * The value to set is the oldest of nextMXact and all the valid per-backend
582 : : * OldestMemberMXactId[] entries. Because of the locking we do, we can be
583 : : * certain that no subsequent call to MultiXactIdSetOldestMember can set
584 : : * an OldestMemberMXactId[] entry older than what we compute here. Therefore
585 : : * there is no live transaction, now or later, that can be a member of any
586 : : * MultiXactId older than the OldestVisibleMXactId we compute here.
587 : : */
588 : : static void
589 : 92471 : MultiXactIdSetOldestVisible(void)
590 : : {
655 heikki.linnakangas@i 591 [ + + ]: 92471 : if (!MultiXactIdIsValid(OldestVisibleMXactId[MyProcNumber]))
592 : : {
593 : : MultiXactId oldestMXact;
594 : : int i;
595 : :
7539 tgl@sss.pgh.pa.us 596 : 3211 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
597 : :
598 : 3211 : oldestMXact = MultiXactState->nextMXact;
655 heikki.linnakangas@i 599 [ + + ]: 409777 : for (i = 0; i < MaxOldestSlot; i++)
600 : : {
7539 tgl@sss.pgh.pa.us 601 : 406566 : MultiXactId thisoldest = OldestMemberMXactId[i];
602 : :
603 [ + + + + ]: 462502 : if (MultiXactIdIsValid(thisoldest) &&
604 : 55936 : MultiXactIdPrecedes(thisoldest, oldestMXact))
605 : 5676 : oldestMXact = thisoldest;
606 : : }
607 : :
655 heikki.linnakangas@i 608 : 3211 : OldestVisibleMXactId[MyProcNumber] = oldestMXact;
609 : :
7539 tgl@sss.pgh.pa.us 610 : 3211 : LWLockRelease(MultiXactGenLock);
611 : :
612 : : debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u",
613 : : MyProcNumber, oldestMXact);
614 : : }
615 : 92471 : }
616 : :
617 : : /*
618 : : * ReadNextMultiXactId
619 : : * Return the next MultiXactId to be assigned, but don't allocate it
620 : : */
621 : : MultiXactId
4712 alvherre@alvh.no-ip. 622 : 31169 : ReadNextMultiXactId(void)
623 : : {
624 : : MultiXactId mxid;
625 : :
626 : : /* XXX we could presumably do this without a lock. */
627 : 31169 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
628 : 31169 : mxid = MultiXactState->nextMXact;
629 : 31169 : LWLockRelease(MultiXactGenLock);
630 : :
631 : 31169 : return mxid;
632 : : }
633 : :
634 : : /*
635 : : * ReadMultiXactIdRange
636 : : * Get the range of IDs that may still be referenced by a relation.
637 : : */
638 : : void
1883 rhaas@postgresql.org 639 : 1477 : ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
640 : : {
641 : 1477 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
642 : 1477 : *oldest = MultiXactState->oldestMultiXactId;
643 : 1477 : *next = MultiXactState->nextMXact;
644 : 1477 : LWLockRelease(MultiXactGenLock);
645 : 1477 : }
646 : :
647 : :
648 : : /*
649 : : * MultiXactIdCreateFromMembers
650 : : * Make a new MultiXactId from the specified set of members
651 : : *
652 : : * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
653 : : * given TransactionIds as members. Returns the newly created MultiXactId.
654 : : *
655 : : * NB: the passed members[] array will be sorted in-place.
656 : : */
657 : : MultiXactId
4385 alvherre@alvh.no-ip. 658 : 76650 : MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
659 : : {
660 : : MultiXactId multi;
661 : : MultiXactOffset offset;
662 : : xl_multixact_create xlrec;
663 : :
664 : : debug_elog3(DEBUG2, "Create: %s",
665 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
666 : :
667 : : /*
668 : : * See if the same set of members already exists in our cache; if so, just
669 : : * re-use that MultiXactId. (Note: it might seem that looking in our
670 : : * cache is insufficient, and we ought to search disk to see if a
671 : : * duplicate definition already exists. But since we only ever create
672 : : * MultiXacts containing our own XID, in most cases any such MultiXacts
673 : : * were in fact created by us, and so will be in our cache. There are
674 : : * corner cases where someone else added us to a MultiXact without our
675 : : * knowledge, but it's not worth checking for.)
676 : : */
4712 677 : 76650 : multi = mXactCacheGetBySet(nmembers, members);
7539 tgl@sss.pgh.pa.us 678 [ + + ]: 76650 : if (MultiXactIdIsValid(multi))
679 : : {
680 : : debug_elog2(DEBUG2, "Create: in cache!");
681 : 71341 : return multi;
682 : : }
683 : :
684 : : /* Verify that there is a single update Xid among the given members. */
685 : : {
686 : : int i;
4256 alvherre@alvh.no-ip. 687 : 5309 : bool has_update = false;
688 : :
689 [ + + ]: 100053 : for (i = 0; i < nmembers; i++)
690 : : {
691 [ + + ]: 94744 : if (ISUPDATE_from_mxstatus(members[i].status))
692 : : {
693 [ - + ]: 2373 : if (has_update)
1120 alvherre@alvh.no-ip. 694 [ # # ]:UBC 0 : elog(ERROR, "new multixact has more than one updating member: %s",
695 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
4256 alvherre@alvh.no-ip. 696 :CBC 2373 : has_update = true;
697 : : }
698 : : }
699 : : }
700 : :
701 : : /* Load the injection point before entering the critical section */
702 : : INJECTION_POINT_LOAD("multixact-create-from-members");
703 : :
704 : : /*
705 : : * Assign the MXID and offsets range to use, and make sure there is space
706 : : * in the OFFSETs and MEMBERs files. NB: this routine does
707 : : * START_CRIT_SECTION().
708 : : *
709 : : * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
710 : : * that we've called MultiXactIdSetOldestMember here. This is because
711 : : * this routine is used in some places to create new MultiXactIds of which
712 : : * the current backend is not a member, notably during freezing of multis
713 : : * in vacuum. During vacuum, in particular, it would be unacceptable to
714 : : * keep OldestMulti set, in case it runs for long.
715 : : */
4712 716 : 5309 : multi = GetNewMultiXactId(nmembers, &offset);
717 : :
718 : : INJECTION_POINT_CACHED("multixact-create-from-members", NULL);
719 : :
720 : : /* Make an XLOG entry describing the new MXID. */
7498 tgl@sss.pgh.pa.us 721 : 5309 : xlrec.mid = multi;
722 : 5309 : xlrec.moff = offset;
4712 alvherre@alvh.no-ip. 723 : 5309 : xlrec.nmembers = nmembers;
724 : :
725 : : /*
726 : : * XXX Note: there's a lot of padding space in MultiXactMember. We could
727 : : * find a more compact representation of this Xlog record -- perhaps all
728 : : * the status flags in one XLogRecData, then all the xids in another one?
729 : : * Not clear that it's worth the trouble though.
730 : : */
4046 heikki.linnakangas@i 731 : 5309 : XLogBeginInsert();
310 peter@eisentraut.org 732 : 5309 : XLogRegisterData(&xlrec, SizeOfMultiXactCreate);
733 : 5309 : XLogRegisterData(members, nmembers * sizeof(MultiXactMember));
734 : :
4046 heikki.linnakangas@i 735 : 5309 : (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
736 : :
737 : : /* Now enter the information into the OFFSETs and MEMBERs logs */
4712 alvherre@alvh.no-ip. 738 : 5309 : RecordNewMultiXact(multi, offset, nmembers, members);
739 : :
740 : : /* Done with critical section */
7356 tgl@sss.pgh.pa.us 741 [ - + ]: 5309 : END_CRIT_SECTION();
742 : :
743 : : /* Store the new MultiXactId in the local cache, too */
4712 alvherre@alvh.no-ip. 744 : 5309 : mXactCachePut(multi, nmembers, members);
745 : :
746 : : debug_elog2(DEBUG2, "Create: all done");
747 : :
7498 tgl@sss.pgh.pa.us 748 : 5309 : return multi;
749 : : }
750 : :
751 : : /*
752 : : * RecordNewMultiXact
753 : : * Write info about a new multixact into the offsets and members files
754 : : *
755 : : * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
756 : : * use it.
757 : : */
758 : : static void
759 : 5311 : RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
760 : : int nmembers, MultiXactMember *members)
761 : : {
762 : : int64 pageno;
763 : : int64 prev_pageno;
764 : : int entryno;
765 : : int slotno;
766 : : MultiXactOffset *offptr;
767 : : MultiXactId next;
768 : : int64 next_pageno;
769 : : int next_entryno;
770 : : MultiXactOffset *next_offptr;
771 : : MultiXactOffset next_offset;
772 : : LWLock *lock;
659 alvherre@alvh.no-ip. 773 : 5311 : LWLock *prevlock = NULL;
774 : :
775 : : /* position of this multixid in the offsets SLRU area */
7539 tgl@sss.pgh.pa.us 776 : 5311 : pageno = MultiXactIdToOffsetPage(multi);
777 : 5311 : entryno = MultiXactIdToOffsetEntry(multi);
778 : :
779 : : /* position of the next multixid */
6 heikki.linnakangas@i 780 :GNC 5311 : next = NextMultiXactId(multi);
15 heikki.linnakangas@i 781 :CBC 5311 : next_pageno = MultiXactIdToOffsetPage(next);
782 : 5311 : next_entryno = MultiXactIdToOffsetEntry(next);
783 : :
784 : : /*
785 : : * Set the starting offset of this multixid's members.
786 : : *
787 : : * In the common case, it was already be set by the previous
788 : : * RecordNewMultiXact call, as this was the next multixid of the previous
789 : : * multixid. But if multiple backends are generating multixids
790 : : * concurrently, we might race ahead and get called before the previous
791 : : * multixid.
792 : : */
659 alvherre@alvh.no-ip. 793 : 5311 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
794 : 5311 : LWLockAcquire(lock, LW_EXCLUSIVE);
795 : :
796 : : /*
797 : : * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
798 : : * to complain about if there's any I/O error. This is kinda bogus, but
799 : : * since the errors will always give the full pathname, it should be clear
800 : : * enough that a MultiXactId is really involved. Perhaps someday we'll
801 : : * take the trouble to generalize the slru.c error reporting code.
802 : : */
6714 tgl@sss.pgh.pa.us 803 : 5311 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
7498 804 : 5311 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7539 805 : 5311 : offptr += entryno;
806 : :
15 heikki.linnakangas@i 807 [ - + ]: 5311 : if (*offptr != offset)
808 : : {
809 : : /* should already be set to the correct value, or not at all */
15 heikki.linnakangas@i 810 [ # # ]:LBC (11) : Assert(*offptr == 0);
811 : (11) : *offptr = offset;
812 : (11) : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
813 : : }
814 : :
815 : : /*
816 : : * Set the next multixid's offset to the end of this multixid's members.
817 : : */
15 heikki.linnakangas@i 818 [ + + ]:CBC 5311 : if (next_pageno == pageno)
819 : : {
820 : 5306 : next_offptr = offptr + 1;
821 : : }
822 : : else
823 : : {
824 : : /* must be the first entry on the page */
15 heikki.linnakangas@i 825 [ + + - + ]:GBC 5 : Assert(next_entryno == 0 || next == FirstMultiXactId);
826 : :
827 : : /* Swap the lock for a lock on the next page */
828 : 5 : LWLockRelease(lock);
829 : 5 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, next_pageno);
830 : 5 : LWLockAcquire(lock, LW_EXCLUSIVE);
831 : :
832 : 5 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, next_pageno, true, next);
833 : 5 : next_offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
834 : 5 : next_offptr += next_entryno;
835 : : }
836 : :
837 : : /* Like in GetNewMultiXactId(), skip over offset 0 */
13 heikki.linnakangas@i 838 :CBC 5311 : next_offset = offset + nmembers;
839 [ - + ]: 5311 : if (next_offset == 0)
13 heikki.linnakangas@i 840 :UBC 0 : next_offset = 1;
13 heikki.linnakangas@i 841 [ + - ]:CBC 5311 : if (*next_offptr != next_offset)
842 : : {
843 : : /* should already be set to the correct value, or not at all */
15 844 [ - + ]: 5311 : Assert(*next_offptr == 0);
13 845 : 5311 : *next_offptr = next_offset;
15 846 : 5311 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
847 : : }
848 : :
849 : : /* Release MultiXactOffset SLRU lock. */
659 alvherre@alvh.no-ip. 850 : 5311 : LWLockRelease(lock);
851 : :
7539 tgl@sss.pgh.pa.us 852 : 5311 : prev_pageno = -1;
853 : :
15 heikki.linnakangas@i 854 [ + + ]: 100059 : for (int i = 0; i < nmembers; i++, offset++)
855 : : {
856 : : TransactionId *memberptr;
857 : : uint32 *flagsptr;
858 : : uint32 flagsval;
859 : : int bshift;
860 : : int flagsoff;
861 : : int memberoff;
862 : :
4712 alvherre@alvh.no-ip. 863 [ - + ]: 94748 : Assert(members[i].status <= MultiXactStatusUpdate);
864 : :
7539 tgl@sss.pgh.pa.us 865 : 94748 : pageno = MXOffsetToMemberPage(offset);
4712 alvherre@alvh.no-ip. 866 : 94748 : memberoff = MXOffsetToMemberOffset(offset);
867 : 94748 : flagsoff = MXOffsetToFlagsOffset(offset);
868 : 94748 : bshift = MXOffsetToFlagsBitShift(offset);
869 : :
7539 tgl@sss.pgh.pa.us 870 [ + + ]: 94748 : if (pageno != prev_pageno)
871 : : {
872 : : /*
873 : : * MultiXactMember SLRU page is changed so check if this new page
874 : : * fall into the different SLRU bank then release the old bank's
875 : : * lock and acquire lock on the new bank.
876 : : */
659 alvherre@alvh.no-ip. 877 : 5365 : lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
878 [ + - ]: 5365 : if (lock != prevlock)
879 : : {
880 [ + + ]: 5365 : if (prevlock != NULL)
659 alvherre@alvh.no-ip. 881 :GBC 54 : LWLockRelease(prevlock);
882 : :
659 alvherre@alvh.no-ip. 883 :CBC 5365 : LWLockAcquire(lock, LW_EXCLUSIVE);
884 : 5365 : prevlock = lock;
885 : : }
6714 tgl@sss.pgh.pa.us 886 : 5365 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
7539 887 : 5365 : prev_pageno = pageno;
888 : : }
889 : :
890 : 94748 : memberptr = (TransactionId *)
4712 alvherre@alvh.no-ip. 891 : 94748 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
892 : :
893 : 94748 : *memberptr = members[i].xid;
894 : :
895 : 94748 : flagsptr = (uint32 *)
896 : 94748 : (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
897 : :
898 : 94748 : flagsval = *flagsptr;
899 : 94748 : flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
900 : 94748 : flagsval |= (members[i].status << bshift);
901 : 94748 : *flagsptr = flagsval;
902 : :
7348 tgl@sss.pgh.pa.us 903 : 94748 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
904 : : }
905 : :
659 alvherre@alvh.no-ip. 906 [ + - ]: 5311 : if (prevlock != NULL)
907 : 5311 : LWLockRelease(prevlock);
7539 tgl@sss.pgh.pa.us 908 : 5311 : }
909 : :
910 : : /*
911 : : * GetNewMultiXactId
912 : : * Get the next MultiXactId.
913 : : *
914 : : * Also, reserve the needed amount of space in the "members" area. The
915 : : * starting offset of the reserved space is returned in *offset.
916 : : *
917 : : * This may generate XLOG records for expansion of the offsets and/or members
918 : : * files. Unfortunately, we have to do that while holding MultiXactGenLock
919 : : * to avoid race conditions --- the XLOG record for zeroing a page must appear
920 : : * before any backend can possibly try to store data in that page!
921 : : *
922 : : * We start a critical section before advancing the shared counters. The
923 : : * caller must end the critical section after writing SLRU data.
924 : : */
925 : : static MultiXactId
4712 alvherre@alvh.no-ip. 926 : 5309 : GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
927 : : {
928 : : MultiXactId result;
929 : : MultiXactOffset nextOffset;
930 : :
931 : : debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
932 : :
933 : : /* safety check, we should never get this far in a HS standby */
934 [ - + ]: 5309 : if (RecoveryInProgress())
4712 alvherre@alvh.no-ip. 935 [ # # ]:UBC 0 : elog(ERROR, "cannot assign MultiXactIds during recovery");
936 : :
7539 tgl@sss.pgh.pa.us 937 :CBC 5309 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
938 : :
939 : : /* Assign the MXID */
940 : 5309 : result = MultiXactState->nextMXact;
941 : :
942 : : /*----------
943 : : * Check to see if it's safe to assign another MultiXactId. This protects
944 : : * against catastrophic data loss due to multixact wraparound. The basic
945 : : * rules are:
946 : : *
947 : : * If we're past multiVacLimit or the safe threshold for member storage
948 : : * space, or we don't know what the safe threshold for member storage is,
949 : : * start trying to force autovacuum cycles.
950 : : * If we're past multiWarnLimit, start issuing warnings.
951 : : * If we're past multiStopLimit, refuse to create new MultiXactIds.
952 : : *
953 : : * Note these are pretty much the same protections in GetNewTransactionId.
954 : : *----------
955 : : */
3833 andres@anarazel.de 956 [ - + ]: 5309 : if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit))
957 : : {
958 : : /*
959 : : * For safety's sake, we release MultiXactGenLock while sending
960 : : * signals, warnings, etc. This is not so much because we care about
961 : : * preserving concurrency in this situation, as to avoid any
962 : : * possibility of deadlock while doing get_database_name(). First,
963 : : * copy all the shared values we'll need in this path.
964 : : */
4712 alvherre@alvh.no-ip. 965 :UBC 0 : MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
966 : 0 : MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
967 : 0 : MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
968 : 0 : Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
969 : :
970 : 0 : LWLockRelease(MultiXactGenLock);
971 : :
972 [ # # ]: 0 : if (IsUnderPostmaster &&
973 [ # # ]: 0 : !MultiXactIdPrecedes(result, multiStopLimit))
974 : : {
975 : 0 : char *oldest_datname = get_database_name(oldest_datoid);
976 : :
977 : : /*
978 : : * Immediately kick autovacuum into action as we're already in
979 : : * ERROR territory.
980 : : */
3833 andres@anarazel.de 981 : 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
982 : :
983 : : /* complain even if that DB has disappeared */
4712 alvherre@alvh.no-ip. 984 [ # # ]: 0 : if (oldest_datname)
985 [ # # ]: 0 : ereport(ERROR,
986 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
987 : : errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
988 : : oldest_datname),
989 : : errhint("Execute a database-wide VACUUM in that database.\n"
990 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
991 : : else
992 [ # # ]: 0 : ereport(ERROR,
993 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
994 : : errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
995 : : oldest_datoid),
996 : : errhint("Execute a database-wide VACUUM in that database.\n"
997 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
998 : : }
999 : :
1000 : : /*
1001 : : * To avoid swamping the postmaster with signals, we issue the autovac
1002 : : * request only once per 64K multis generated. This still gives
1003 : : * plenty of chances before we get into real trouble.
1004 : : */
6 heikki.linnakangas@i 1005 [ # # # # :UNC 0 : if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
# # ]
3833 andres@anarazel.de 1006 :UBC 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1007 : :
1008 [ # # ]: 0 : if (!MultiXactIdPrecedes(result, multiWarnLimit))
1009 : : {
4712 alvherre@alvh.no-ip. 1010 : 0 : char *oldest_datname = get_database_name(oldest_datoid);
1011 : :
1012 : : /* complain even if that DB has disappeared */
1013 [ # # ]: 0 : if (oldest_datname)
1014 [ # # ]: 0 : ereport(WARNING,
1015 : : (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1016 : : "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1017 : : multiWrapLimit - result,
1018 : : oldest_datname,
1019 : : multiWrapLimit - result),
1020 : : errhint("Execute a database-wide VACUUM in that database.\n"
1021 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1022 : : else
1023 [ # # ]: 0 : ereport(WARNING,
1024 : : (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1025 : : "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1026 : : multiWrapLimit - result,
1027 : : oldest_datoid,
1028 : : multiWrapLimit - result),
1029 : : errhint("Execute a database-wide VACUUM in that database.\n"
1030 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1031 : : }
1032 : :
1033 : : /* Re-acquire lock and start over */
1034 : 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1035 : 0 : result = MultiXactState->nextMXact;
1036 : : }
1037 : :
1038 : : /*
1039 : : * Make sure there is room for the next MXID in the file. Assigning this
1040 : : * MXID sets the next MXID's offset already.
1041 : : */
6 heikki.linnakangas@i 1042 :GNC 5309 : ExtendMultiXactOffset(NextMultiXactId(result));
1043 : :
1044 : : /*
1045 : : * Reserve the members space, similarly to above.
1046 : : */
7356 tgl@sss.pgh.pa.us 1047 :CBC 5309 : nextOffset = MultiXactState->nextOffset;
1048 : :
1049 : : /*
1050 : : * Offsets are 64-bit integers and will never wrap around. Firstly, it
1051 : : * would take an unrealistic amount of time and resources to consume 2^64
1052 : : * offsets. Secondly, multixid creation is WAL-logged, so you would run
1053 : : * out of LSNs before reaching offset wraparound. Nevertheless, check for
1054 : : * wraparound as a sanity check.
1055 : : */
9 heikki.linnakangas@i 1056 [ - + ]:GNC 5309 : if (nextOffset + nmembers < nextOffset)
9 heikki.linnakangas@i 1057 [ # # ]:UBC 0 : ereport(ERROR,
1058 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1059 : : errmsg("MultiXact members would wrap around")));
9 heikki.linnakangas@i 1060 :GNC 5309 : *offset = nextOffset;
1061 : :
4712 alvherre@alvh.no-ip. 1062 :CBC 5309 : ExtendMultiXactMember(nextOffset, nmembers);
1063 : :
1064 : : /*
1065 : : * Critical section from here until caller has written the data into the
1066 : : * just-reserved SLRU space; we don't want to error out with a partly
1067 : : * written MultiXact structure. (In particular, failing to write our
1068 : : * start offset after advancing nextMXact would effectively corrupt the
1069 : : * previous MultiXact.)
1070 : : */
7356 tgl@sss.pgh.pa.us 1071 : 5309 : START_CRIT_SECTION();
1072 : :
1073 : : /*
1074 : : * Advance counters. As in GetNewTransactionId(), this must not happen
1075 : : * until after file extension has succeeded!
1076 : : */
6 heikki.linnakangas@i 1077 :GNC 5309 : MultiXactState->nextMXact = NextMultiXactId(result);
4712 alvherre@alvh.no-ip. 1078 :CBC 5309 : MultiXactState->nextOffset += nmembers;
1079 : :
7539 tgl@sss.pgh.pa.us 1080 : 5309 : LWLockRelease(MultiXactGenLock);
1081 : :
1082 : : debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64,
1083 : : result, *offset);
1084 : 5309 : return result;
1085 : : }
1086 : :
1087 : : /*
1088 : : * GetMultiXactIdMembers
1089 : : * Return the set of MultiXactMembers that make up a MultiXactId
1090 : : *
1091 : : * Return value is the number of members found, or -1 if there are none,
1092 : : * and *members is set to a newly palloc'ed array of members. It's the
1093 : : * caller's responsibility to free it when done with it.
1094 : : *
1095 : : * from_pgupgrade must be passed as true if and only if only the multixact
1096 : : * corresponds to a value from a tuple that was locked in a 9.2-or-older
1097 : : * installation and later pg_upgrade'd (that is, the infomask is
1098 : : * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1099 : : * can still be running, so we return -1 just like for an empty multixact
1100 : : * without any further checking. It would be wrong to try to resolve such a
1101 : : * multixact: either the multixact is within the current valid multixact
1102 : : * range, in which case the returned result would be bogus, or outside that
1103 : : * range, in which case an error would be raised.
1104 : : *
1105 : : * In all other cases, the passed multixact must be within the known valid
1106 : : * range, that is, greater than or equal to oldestMultiXactId, and less than
1107 : : * nextMXact. Otherwise, an error is raised.
1108 : : *
1109 : : * isLockOnly must be set to true if caller is certain that the given multi
1110 : : * is used only to lock tuples; can be false without loss of correctness,
1111 : : * but passing a true means we can return quickly without checking for
1112 : : * old updates.
1113 : : */
1114 : : int
4712 alvherre@alvh.no-ip. 1115 : 547825 : GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
1116 : : bool from_pgupgrade, bool isLockOnly)
1117 : : {
1118 : : int64 pageno;
1119 : : int64 prev_pageno;
1120 : : int entryno;
1121 : : int slotno;
1122 : : MultiXactOffset *offptr;
1123 : : MultiXactOffset offset;
1124 : : MultiXactOffset nextMXOffset;
1125 : : int length;
1126 : : MultiXactId oldestMXact;
1127 : : MultiXactId nextMXact;
1128 : : MultiXactMember *ptr;
1129 : : LWLock *lock;
1130 : :
1131 : : debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1132 : :
3464 1133 [ + - - + ]: 547825 : if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1134 : : {
1645 heikki.linnakangas@i 1135 :UBC 0 : *members = NULL;
4403 alvherre@alvh.no-ip. 1136 : 0 : return -1;
1137 : : }
1138 : :
1139 : : /* See if the MultiXactId is in the local cache */
4712 alvherre@alvh.no-ip. 1140 :CBC 547825 : length = mXactCacheGetById(multi, members);
7539 tgl@sss.pgh.pa.us 1141 [ + + ]: 547825 : if (length >= 0)
1142 : : {
1143 : : debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1144 : : mxid_to_string(multi, length, *members));
1145 : 455354 : return length;
1146 : : }
1147 : :
1148 : : /* Set our OldestVisibleMXactId[] entry if we didn't already */
1149 : 92471 : MultiXactIdSetOldestVisible();
1150 : :
1151 : : /*
1152 : : * If we know the multi is used only for locking and not for updates, then
1153 : : * we can skip checking if the value is older than our oldest visible
1154 : : * multi. It cannot possibly still be running.
1155 : : */
1186 pg@bowt.ie 1156 [ + + + + ]: 96173 : if (isLockOnly &&
655 heikki.linnakangas@i 1157 : 3702 : MultiXactIdPrecedes(multi, OldestVisibleMXactId[MyProcNumber]))
1158 : : {
1159 : : debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
4160 alvherre@alvh.no-ip. 1160 : 730 : *members = NULL;
1161 : 730 : return -1;
1162 : : }
1163 : :
1164 : : /*
1165 : : * We check known limits on MultiXact before resorting to the SLRU area.
1166 : : *
1167 : : * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1168 : : * useful; it has already been removed, or will be removed shortly, by
1169 : : * truncation. If one is passed, an error is raised.
1170 : : *
1171 : : * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1172 : : * implies undetected ID wraparound has occurred. This raises a hard
1173 : : * error.
1174 : : *
1175 : : * Shared lock is enough here since we aren't modifying any global state.
1176 : : * Acquire it just long enough to grab the current counter values.
1177 : : */
7539 tgl@sss.pgh.pa.us 1178 : 91741 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1179 : :
4712 alvherre@alvh.no-ip. 1180 : 91741 : oldestMXact = MultiXactState->oldestMultiXactId;
7356 tgl@sss.pgh.pa.us 1181 : 91741 : nextMXact = MultiXactState->nextMXact;
1182 : :
1183 : 91741 : LWLockRelease(MultiXactGenLock);
1184 : :
4712 alvherre@alvh.no-ip. 1185 [ - + ]: 91741 : if (MultiXactIdPrecedes(multi, oldestMXact))
3464 alvherre@alvh.no-ip. 1186 [ # # ]:UBC 0 : ereport(ERROR,
1187 : : (errcode(ERRCODE_INTERNAL_ERROR),
1188 : : errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1189 : : multi)));
1190 : :
4712 alvherre@alvh.no-ip. 1191 [ - + ]:CBC 91741 : if (!MultiXactIdPrecedes(multi, nextMXact))
4712 alvherre@alvh.no-ip. 1192 [ # # ]:UBC 0 : ereport(ERROR,
1193 : : (errcode(ERRCODE_INTERNAL_ERROR),
1194 : : errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1195 : : multi)));
1196 : :
1197 : : /*
1198 : : * Find out the offset at which we need to start reading MultiXactMembers
1199 : : * and the number of members in the multixact. We determine the latter as
1200 : : * the difference between this multixact's starting offset and the next
1201 : : * one's.
1202 : : */
7539 tgl@sss.pgh.pa.us 1203 :CBC 91741 : pageno = MultiXactIdToOffsetPage(multi);
1204 : 91741 : entryno = MultiXactIdToOffsetEntry(multi);
1205 : :
1206 : : /* Acquire the bank lock for the page we need. */
659 alvherre@alvh.no-ip. 1207 : 91741 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
654 1208 : 91741 : LWLockAcquire(lock, LW_EXCLUSIVE);
1209 : :
1210 : : /* read this multi's offset */
6714 tgl@sss.pgh.pa.us 1211 : 91741 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
7498 1212 : 91741 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7539 1213 : 91741 : offptr += entryno;
1214 : 91741 : offset = *offptr;
1215 : :
7 heikki.linnakangas@i 1216 [ - + ]:GNC 91741 : if (offset == 0)
7 heikki.linnakangas@i 1217 [ # # ]:UNC 0 : ereport(ERROR,
1218 : : (errcode(ERRCODE_DATA_CORRUPTED),
1219 : : errmsg("MultiXact %u has invalid offset", multi)));
1220 : :
1221 : : /* read next multi's offset */
1222 : : {
1223 : : MultiXactId tmpMXact;
1224 : :
1225 : : /* handle wraparound if needed */
6 heikki.linnakangas@i 1226 :GNC 91741 : tmpMXact = NextMultiXactId(multi);
1227 : :
7539 tgl@sss.pgh.pa.us 1228 :CBC 91741 : prev_pageno = pageno;
1229 : :
1230 : 91741 : pageno = MultiXactIdToOffsetPage(tmpMXact);
1231 : 91741 : entryno = MultiXactIdToOffsetEntry(tmpMXact);
1232 : :
1233 [ + + ]: 91741 : if (pageno != prev_pageno)
1234 : : {
1235 : : LWLock *newlock;
1236 : :
1237 : : /*
1238 : : * Since we're going to access a different SLRU page, if this page
1239 : : * falls under a different bank, release the old bank's lock and
1240 : : * acquire the lock of the new bank.
1241 : : */
654 alvherre@alvh.no-ip. 1242 :GBC 13 : newlock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1243 [ - + ]: 13 : if (newlock != lock)
1244 : : {
654 alvherre@alvh.no-ip. 1245 :UBC 0 : LWLockRelease(lock);
1246 : 0 : LWLockAcquire(newlock, LW_EXCLUSIVE);
1247 : 0 : lock = newlock;
1248 : : }
6714 tgl@sss.pgh.pa.us 1249 :GBC 13 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
1250 : : }
1251 : :
7498 tgl@sss.pgh.pa.us 1252 :CBC 91741 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7539 1253 : 91741 : offptr += entryno;
7356 1254 : 91741 : nextMXOffset = *offptr;
1255 : : }
1256 : :
654 alvherre@alvh.no-ip. 1257 : 91741 : LWLockRelease(lock);
1258 : 91741 : lock = NULL;
1259 : :
1260 : : /* Sanity check the next offset */
7 heikki.linnakangas@i 1261 [ - + ]:GNC 91741 : if (nextMXOffset == 0)
7 heikki.linnakangas@i 1262 [ # # ]:UNC 0 : ereport(ERROR,
1263 : : (errcode(ERRCODE_DATA_CORRUPTED),
1264 : : errmsg("MultiXact %u has invalid next offset", multi)));
3 heikki.linnakangas@i 1265 [ - + ]:GNC 91741 : if (nextMXOffset == offset)
3 heikki.linnakangas@i 1266 [ # # ]:UNC 0 : ereport(ERROR,
1267 : : (errcode(ERRCODE_DATA_CORRUPTED),
1268 : : errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
1269 : : multi, offset)));
7 heikki.linnakangas@i 1270 [ - + ]:GNC 91741 : if (nextMXOffset < offset)
7 heikki.linnakangas@i 1271 [ # # ]:UNC 0 : ereport(ERROR,
1272 : : (errcode(ERRCODE_DATA_CORRUPTED),
1273 : : errmsg("MultiXact %u has offset (%" PRIu64 ") greater than its next offset (%" PRIu64 ")",
1274 : : multi, offset, nextMXOffset)));
7 heikki.linnakangas@i 1275 [ - + ]:GNC 91741 : if (nextMXOffset - offset > INT32_MAX)
7 heikki.linnakangas@i 1276 [ # # ]:UNC 0 : ereport(ERROR,
1277 : : (errcode(ERRCODE_DATA_CORRUPTED),
1278 : : errmsg("MultiXact %u has too many members (%" PRIu64 ")",
1279 : : multi, nextMXOffset - offset)));
7 heikki.linnakangas@i 1280 :GNC 91741 : length = nextMXOffset - offset;
1281 : :
1282 : : /* read the members */
4712 alvherre@alvh.no-ip. 1283 : 91741 : ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
7539 tgl@sss.pgh.pa.us 1284 :CBC 91741 : prev_pageno = -1;
654 alvherre@alvh.no-ip. 1285 [ + + ]: 1827926 : for (int i = 0; i < length; i++, offset++)
1286 : : {
1287 : : TransactionId *xactptr;
1288 : : uint32 *flagsptr;
1289 : : int flagsoff;
1290 : : int bshift;
1291 : : int memberoff;
1292 : :
7539 tgl@sss.pgh.pa.us 1293 : 1736185 : pageno = MXOffsetToMemberPage(offset);
4712 alvherre@alvh.no-ip. 1294 : 1736185 : memberoff = MXOffsetToMemberOffset(offset);
1295 : :
7539 tgl@sss.pgh.pa.us 1296 [ + + ]: 1736185 : if (pageno != prev_pageno)
1297 : : {
1298 : : LWLock *newlock;
1299 : :
1300 : : /*
1301 : : * Since we're going to access a different SLRU page, if this page
1302 : : * falls under a different bank, release the old bank's lock and
1303 : : * acquire the lock of the new bank.
1304 : : */
654 alvherre@alvh.no-ip. 1305 : 91903 : newlock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1306 [ + - ]: 91903 : if (newlock != lock)
1307 : : {
1308 [ + + ]: 91903 : if (lock)
654 alvherre@alvh.no-ip. 1309 :GBC 162 : LWLockRelease(lock);
654 alvherre@alvh.no-ip. 1310 :CBC 91903 : LWLockAcquire(newlock, LW_EXCLUSIVE);
1311 : 91903 : lock = newlock;
1312 : : }
1313 : :
6714 tgl@sss.pgh.pa.us 1314 : 91903 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
7539 1315 : 91903 : prev_pageno = pageno;
1316 : : }
1317 : :
1318 : 1736185 : xactptr = (TransactionId *)
4712 alvherre@alvh.no-ip. 1319 : 1736185 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
9 heikki.linnakangas@i 1320 [ - + ]:GNC 1736185 : Assert(TransactionIdIsValid(*xactptr));
1321 : :
4712 alvherre@alvh.no-ip. 1322 :CBC 1736185 : flagsoff = MXOffsetToFlagsOffset(offset);
1323 : 1736185 : bshift = MXOffsetToFlagsBitShift(offset);
1324 : 1736185 : flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1325 : :
9 heikki.linnakangas@i 1326 :GNC 1736185 : ptr[i].xid = *xactptr;
1327 : 1736185 : ptr[i].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
1328 : : }
1329 : :
654 alvherre@alvh.no-ip. 1330 :CBC 91741 : LWLockRelease(lock);
1331 : :
1332 : : /*
1333 : : * Copy the result into the local cache.
1334 : : */
9 heikki.linnakangas@i 1335 :GNC 91741 : mXactCachePut(multi, length, ptr);
1336 : :
1337 : : debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1338 : : mxid_to_string(multi, length, ptr));
1645 heikki.linnakangas@i 1339 :CBC 91741 : *members = ptr;
9 heikki.linnakangas@i 1340 :GNC 91741 : return length;
1341 : : }
1342 : :
1343 : : /*
1344 : : * mxactMemberComparator
1345 : : * qsort comparison function for MultiXactMember
1346 : : *
1347 : : * We can't use wraparound comparison for XIDs because that does not respect
1348 : : * the triangle inequality! Any old sort order will do.
1349 : : */
1350 : : static int
4712 alvherre@alvh.no-ip. 1351 :CBC 3050745 : mxactMemberComparator(const void *arg1, const void *arg2)
1352 : : {
1353 : 3050745 : MultiXactMember member1 = *(const MultiXactMember *) arg1;
1354 : 3050745 : MultiXactMember member2 = *(const MultiXactMember *) arg2;
1355 : :
1356 [ + + ]: 3050745 : if (member1.xid > member2.xid)
1357 : 37 : return 1;
1358 [ + + ]: 3050708 : if (member1.xid < member2.xid)
1359 : 3050494 : return -1;
1360 [ + + ]: 214 : if (member1.status > member2.status)
4712 alvherre@alvh.no-ip. 1361 :GBC 16 : return 1;
4712 alvherre@alvh.no-ip. 1362 [ + - ]:CBC 198 : if (member1.status < member2.status)
1363 : 198 : return -1;
4712 alvherre@alvh.no-ip. 1364 :UBC 0 : return 0;
1365 : : }
1366 : :
1367 : : /*
1368 : : * mXactCacheGetBySet
1369 : : * returns a MultiXactId from the cache based on the set of
1370 : : * TransactionIds that compose it, or InvalidMultiXactId if
1371 : : * none matches.
1372 : : *
1373 : : * This is helpful, for example, if two transactions want to lock a huge
1374 : : * table. By using the cache, the second will use the same MultiXactId
1375 : : * for the majority of tuples, thus keeping MultiXactId usage low (saving
1376 : : * both I/O and wraparound issues).
1377 : : *
1378 : : * NB: the passed members array will be sorted in-place.
1379 : : */
1380 : : static MultiXactId
4712 alvherre@alvh.no-ip. 1381 :CBC 76650 : mXactCacheGetBySet(int nmembers, MultiXactMember *members)
1382 : : {
1383 : : dlist_iter iter;
1384 : :
1385 : : debug_elog3(DEBUG2, "CacheGet: looking for %s",
1386 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
1387 : :
1388 : : /* sort the array so comparison is easy */
1389 : 76650 : qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1390 : :
1142 drowley@postgresql.o 1391 [ + - + + ]: 308350 : dclist_foreach(iter, &MXactCache)
1392 : : {
1393 : 303041 : mXactCacheEnt *entry = dclist_container(mXactCacheEnt, node,
1394 : : iter.cur);
1395 : :
4712 alvherre@alvh.no-ip. 1396 [ + + ]: 303041 : if (entry->nmembers != nmembers)
7539 tgl@sss.pgh.pa.us 1397 : 85343 : continue;
1398 : :
1399 : : /*
1400 : : * We assume the cache entries are sorted, and that the unused bits in
1401 : : * "status" are zeroed.
1402 : : */
4712 alvherre@alvh.no-ip. 1403 [ + + ]: 217698 : if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1404 : : {
1405 : : debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1142 drowley@postgresql.o 1406 : 71341 : dclist_move_head(&MXactCache, iter.cur);
7539 tgl@sss.pgh.pa.us 1407 : 71341 : return entry->multi;
1408 : : }
1409 : : }
1410 : :
1411 : : debug_elog2(DEBUG2, "CacheGet: not found :-(");
1412 : 5309 : return InvalidMultiXactId;
1413 : : }
1414 : :
1415 : : /*
1416 : : * mXactCacheGetById
1417 : : * returns the composing MultiXactMember set from the cache for a
1418 : : * given MultiXactId, if present.
1419 : : *
1420 : : * If successful, *xids is set to the address of a palloc'd copy of the
1421 : : * MultiXactMember set. Return value is number of members, or -1 on failure.
1422 : : */
1423 : : static int
4712 alvherre@alvh.no-ip. 1424 : 547825 : mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
1425 : : {
1426 : : dlist_iter iter;
1427 : :
1428 : : debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1429 : :
1142 drowley@postgresql.o 1430 [ + - + + ]: 4903653 : dclist_foreach(iter, &MXactCache)
1431 : : {
1432 : 4811182 : mXactCacheEnt *entry = dclist_container(mXactCacheEnt, node,
1433 : : iter.cur);
1434 : :
7539 tgl@sss.pgh.pa.us 1435 [ + + ]: 4811182 : if (entry->multi == multi)
1436 : : {
1437 : : MultiXactMember *ptr;
1438 : : Size size;
1439 : :
4712 alvherre@alvh.no-ip. 1440 : 455354 : size = sizeof(MultiXactMember) * entry->nmembers;
1441 : 455354 : ptr = (MultiXactMember *) palloc(size);
1442 : :
1443 : 455354 : memcpy(ptr, entry->members, size);
1444 : :
1445 : : debug_elog3(DEBUG2, "CacheGet: found %s",
1446 : : mxid_to_string(multi,
1447 : : entry->nmembers,
1448 : : entry->members));
1449 : :
1450 : : /*
1451 : : * Note we modify the list while not using a modifiable iterator.
1452 : : * This is acceptable only because we exit the iteration
1453 : : * immediately afterwards.
1454 : : */
1142 drowley@postgresql.o 1455 : 455354 : dclist_move_head(&MXactCache, iter.cur);
1456 : :
1645 heikki.linnakangas@i 1457 : 455354 : *members = ptr;
4712 alvherre@alvh.no-ip. 1458 : 455354 : return entry->nmembers;
1459 : : }
1460 : : }
1461 : :
1462 : : debug_elog2(DEBUG2, "CacheGet: not found");
7539 tgl@sss.pgh.pa.us 1463 : 92471 : return -1;
1464 : : }
1465 : :
1466 : : /*
1467 : : * mXactCachePut
1468 : : * Add a new MultiXactId and its composing set into the local cache.
1469 : : */
1470 : : static void
4712 alvherre@alvh.no-ip. 1471 : 97050 : mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1472 : : {
1473 : : mXactCacheEnt *entry;
1474 : :
1475 : : debug_elog3(DEBUG2, "CachePut: storing %s",
1476 : : mxid_to_string(multi, nmembers, members));
1477 : :
7539 tgl@sss.pgh.pa.us 1478 [ + + ]: 97050 : if (MXactContext == NULL)
1479 : : {
1480 : : /* The cache only lives as long as the current transaction */
1481 : : debug_elog2(DEBUG2, "CachePut: initializing memory context");
1482 : 3342 : MXactContext = AllocSetContextCreate(TopTransactionContext,
1483 : : "MultiXact cache context",
1484 : : ALLOCSET_SMALL_SIZES);
1485 : : }
1486 : :
1487 : : entry = (mXactCacheEnt *)
1488 : 97050 : MemoryContextAlloc(MXactContext,
4712 alvherre@alvh.no-ip. 1489 : 97050 : offsetof(mXactCacheEnt, members) +
1490 : : nmembers * sizeof(MultiXactMember));
1491 : :
7539 tgl@sss.pgh.pa.us 1492 : 97050 : entry->multi = multi;
4712 alvherre@alvh.no-ip. 1493 : 97050 : entry->nmembers = nmembers;
1494 : 97050 : memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1495 : :
1496 : : /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1497 : 97050 : qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1498 : :
1142 drowley@postgresql.o 1499 : 97050 : dclist_push_head(&MXactCache, &entry->node);
1500 [ + + ]: 97050 : if (dclist_count(&MXactCache) > MAX_CACHE_ENTRIES)
1501 : : {
1502 : : dlist_node *node;
1503 : :
1142 drowley@postgresql.o 1504 :GBC 9478 : node = dclist_tail_node(&MXactCache);
1505 : 9478 : dclist_delete_from(&MXactCache, node);
1506 : :
1507 : 9478 : entry = dclist_container(mXactCacheEnt, node, node);
1508 : : debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1509 : : entry->multi);
1510 : :
4388 alvherre@alvh.no-ip. 1511 : 9478 : pfree(entry);
1512 : : }
7539 tgl@sss.pgh.pa.us 1513 :CBC 97050 : }
1514 : :
1515 : : char *
4712 alvherre@alvh.no-ip. 1516 :GBC 188088 : mxstatus_to_string(MultiXactStatus status)
1517 : : {
1518 [ + - - - : 188088 : switch (status)
+ - - ]
1519 : : {
1520 : 183634 : case MultiXactStatusForKeyShare:
1521 : 183634 : return "keysh";
4712 alvherre@alvh.no-ip. 1522 :UBC 0 : case MultiXactStatusForShare:
1523 : 0 : return "sh";
1524 : 0 : case MultiXactStatusForNoKeyUpdate:
1525 : 0 : return "fornokeyupd";
1526 : 0 : case MultiXactStatusForUpdate:
1527 : 0 : return "forupd";
4712 alvherre@alvh.no-ip. 1528 :GBC 4454 : case MultiXactStatusNoKeyUpdate:
1529 : 4454 : return "nokeyupd";
4712 alvherre@alvh.no-ip. 1530 :UBC 0 : case MultiXactStatusUpdate:
1531 : 0 : return "upd";
1532 : 0 : default:
1533 [ # # ]: 0 : elog(ERROR, "unrecognized multixact status %d", status);
1534 : : return "";
1535 : : }
1536 : : }
1537 : :
1538 : : char *
1539 : 0 : mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1540 : : {
1541 : : static char *str = NULL;
1542 : : StringInfoData buf;
1543 : : int i;
1544 : :
1545 [ # # ]: 0 : if (str != NULL)
1546 : 0 : pfree(str);
1547 : :
1548 : 0 : initStringInfo(&buf);
1549 : :
1550 : 0 : appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1551 : : mxstatus_to_string(members[0].status));
1552 : :
1553 [ # # ]: 0 : for (i = 1; i < nmembers; i++)
1554 : 0 : appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1555 : 0 : mxstatus_to_string(members[i].status));
1556 : :
1557 : 0 : appendStringInfoChar(&buf, ']');
1558 : 0 : str = MemoryContextStrdup(TopMemoryContext, buf.data);
1559 : 0 : pfree(buf.data);
7539 tgl@sss.pgh.pa.us 1560 : 0 : return str;
1561 : : }
1562 : :
1563 : : /*
1564 : : * AtEOXact_MultiXact
1565 : : * Handle transaction end for MultiXact
1566 : : *
1567 : : * This is called at top transaction commit or abort (we don't care which).
1568 : : */
1569 : : void
7539 tgl@sss.pgh.pa.us 1570 :CBC 330132 : AtEOXact_MultiXact(void)
1571 : : {
1572 : : /*
1573 : : * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1574 : : * which should only be valid while within a transaction.
1575 : : *
1576 : : * We assume that storing a MultiXactId is atomic and so we need not take
1577 : : * MultiXactGenLock to do this.
1578 : : */
655 heikki.linnakangas@i 1579 : 330132 : OldestMemberMXactId[MyProcNumber] = InvalidMultiXactId;
1580 : 330132 : OldestVisibleMXactId[MyProcNumber] = InvalidMultiXactId;
1581 : :
1582 : : /*
1583 : : * Discard the local MultiXactId cache. Since MXactContext was created as
1584 : : * a child of TopTransactionContext, we needn't delete it explicitly.
1585 : : */
7539 tgl@sss.pgh.pa.us 1586 : 330132 : MXactContext = NULL;
1142 drowley@postgresql.o 1587 : 330132 : dclist_init(&MXactCache);
7539 tgl@sss.pgh.pa.us 1588 : 330132 : }
1589 : :
1590 : : /*
1591 : : * AtPrepare_MultiXact
1592 : : * Save multixact state at 2PC transaction prepare
1593 : : *
1594 : : * In this phase, we only store our OldestMemberMXactId value in the two-phase
1595 : : * state file.
1596 : : */
1597 : : void
5869 heikki.linnakangas@i 1598 : 288 : AtPrepare_MultiXact(void)
1599 : : {
655 1600 : 288 : MultiXactId myOldestMember = OldestMemberMXactId[MyProcNumber];
1601 : :
5869 1602 [ + + ]: 288 : if (MultiXactIdIsValid(myOldestMember))
1603 : 62 : RegisterTwoPhaseRecord(TWOPHASE_RM_MULTIXACT_ID, 0,
1604 : : &myOldestMember, sizeof(MultiXactId));
1605 : 288 : }
1606 : :
1607 : : /*
1608 : : * PostPrepare_MultiXact
1609 : : * Clean up after successful PREPARE TRANSACTION
1610 : : */
1611 : : void
164 michael@paquier.xyz 1612 :GNC 288 : PostPrepare_MultiXact(FullTransactionId fxid)
1613 : : {
1614 : : MultiXactId myOldestMember;
1615 : :
1616 : : /*
1617 : : * Transfer our OldestMemberMXactId value to the slot reserved for the
1618 : : * prepared transaction.
1619 : : */
655 heikki.linnakangas@i 1620 :CBC 288 : myOldestMember = OldestMemberMXactId[MyProcNumber];
5869 1621 [ + + ]: 288 : if (MultiXactIdIsValid(myOldestMember))
1622 : : {
164 michael@paquier.xyz 1623 :GNC 62 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1624 : :
1625 : : /*
1626 : : * Even though storing MultiXactId is atomic, acquire lock to make
1627 : : * sure others see both changes, not just the reset of the slot of the
1628 : : * current backend. Using a volatile pointer might suffice, but this
1629 : : * isn't a hot spot.
1630 : : */
5869 heikki.linnakangas@i 1631 :CBC 62 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1632 : :
655 1633 : 62 : OldestMemberMXactId[dummyProcNumber] = myOldestMember;
1634 : 62 : OldestMemberMXactId[MyProcNumber] = InvalidMultiXactId;
1635 : :
5869 1636 : 62 : LWLockRelease(MultiXactGenLock);
1637 : : }
1638 : :
1639 : : /*
1640 : : * We don't need to transfer OldestVisibleMXactId value, because the
1641 : : * transaction is not going to be looking at any more multixacts once it's
1642 : : * prepared.
1643 : : *
1644 : : * We assume that storing a MultiXactId is atomic and so we need not take
1645 : : * MultiXactGenLock to do this.
1646 : : */
655 1647 : 288 : OldestVisibleMXactId[MyProcNumber] = InvalidMultiXactId;
1648 : :
1649 : : /*
1650 : : * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1651 : : */
5869 1652 : 288 : MXactContext = NULL;
1142 drowley@postgresql.o 1653 : 288 : dclist_init(&MXactCache);
5869 heikki.linnakangas@i 1654 : 288 : }
1655 : :
1656 : : /*
1657 : : * multixact_twophase_recover
1658 : : * Recover the state of a prepared transaction at startup
1659 : : */
1660 : : void
164 michael@paquier.xyz 1661 :GNC 8 : multixact_twophase_recover(FullTransactionId fxid, uint16 info,
1662 : : void *recdata, uint32 len)
1663 : : {
1664 : 8 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1665 : : MultiXactId oldestMember;
1666 : :
1667 : : /*
1668 : : * Get the oldest member XID from the state file record, and set it in the
1669 : : * OldestMemberMXactId slot reserved for this prepared transaction.
1670 : : */
5869 heikki.linnakangas@i 1671 [ - + ]:CBC 8 : Assert(len == sizeof(MultiXactId));
5774 bruce@momjian.us 1672 : 8 : oldestMember = *((MultiXactId *) recdata);
1673 : :
655 heikki.linnakangas@i 1674 : 8 : OldestMemberMXactId[dummyProcNumber] = oldestMember;
5869 1675 : 8 : }
1676 : :
1677 : : /*
1678 : : * multixact_twophase_postcommit
1679 : : * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1680 : : */
1681 : : void
164 michael@paquier.xyz 1682 :GNC 67 : multixact_twophase_postcommit(FullTransactionId fxid, uint16 info,
1683 : : void *recdata, uint32 len)
1684 : : {
1685 : 67 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true);
1686 : :
5869 heikki.linnakangas@i 1687 [ - + ]:CBC 67 : Assert(len == sizeof(MultiXactId));
1688 : :
655 1689 : 67 : OldestMemberMXactId[dummyProcNumber] = InvalidMultiXactId;
5869 1690 : 67 : }
1691 : :
1692 : : /*
1693 : : * multixact_twophase_postabort
1694 : : * This is actually just the same as the COMMIT case.
1695 : : */
1696 : : void
164 michael@paquier.xyz 1697 :GNC 26 : multixact_twophase_postabort(FullTransactionId fxid, uint16 info,
1698 : : void *recdata, uint32 len)
1699 : : {
1700 : 26 : multixact_twophase_postcommit(fxid, info, recdata, len);
5869 heikki.linnakangas@i 1701 :CBC 26 : }
1702 : :
1703 : : /*
1704 : : * Initialization of shared memory for MultiXact. We use two SLRU areas,
1705 : : * thus double memory. Also, reserve space for the shared MultiXactState
1706 : : * struct and the per-backend MultiXactId arrays (two of those, too).
1707 : : */
1708 : : Size
7539 tgl@sss.pgh.pa.us 1709 : 1990 : MultiXactShmemSize(void)
1710 : : {
1711 : : Size size;
1712 : :
1713 : : /* We need 2*MaxOldestSlot perBackendXactIds[] entries */
1714 : : #define SHARED_MULTIXACT_STATE_SIZE \
1715 : : add_size(offsetof(MultiXactStateData, perBackendXactIds), \
1716 : : mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
1717 : :
7425 1718 : 1990 : size = SHARED_MULTIXACT_STATE_SIZE;
659 alvherre@alvh.no-ip. 1719 : 1990 : size = add_size(size, SimpleLruShmemSize(multixact_offset_buffers, 0));
1720 : 1990 : size = add_size(size, SimpleLruShmemSize(multixact_member_buffers, 0));
1721 : :
7425 tgl@sss.pgh.pa.us 1722 : 1990 : return size;
1723 : : }
1724 : :
1725 : : void
7539 1726 : 1071 : MultiXactShmemInit(void)
1727 : : {
1728 : : bool found;
1729 : :
1730 : : debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
1731 : :
1732 : 1071 : MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes;
1733 : 1071 : MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
1734 : :
7317 1735 : 1071 : SimpleLruInit(MultiXactOffsetCtl,
1736 : : "multixact_offset", multixact_offset_buffers, 0,
1737 : : "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1738 : : LWTRANCHE_MULTIXACTOFFSET_SLRU,
1739 : : SYNC_HANDLER_MULTIXACT_OFFSET,
1740 : : false);
1797 noah@leadboat.com 1741 : 1071 : SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
7317 tgl@sss.pgh.pa.us 1742 : 1071 : SimpleLruInit(MultiXactMemberCtl,
1743 : : "multixact_member", multixact_member_buffers, 0,
1744 : : "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1745 : : LWTRANCHE_MULTIXACTMEMBER_SLRU,
1746 : : SYNC_HANDLER_MULTIXACT_MEMBER,
1747 : : true);
1748 : : /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
1749 : :
1750 : : /* Initialize our shared state struct */
7539 1751 : 1071 : MultiXactState = ShmemInitStruct("Shared MultiXact State",
1752 : 1071 : SHARED_MULTIXACT_STATE_SIZE,
1753 : : &found);
1754 [ + - ]: 1071 : if (!IsUnderPostmaster)
1755 : : {
1756 [ - + ]: 1071 : Assert(!found);
1757 : :
1758 : : /* Make sure we zero out the per-backend state */
1759 [ + - + - : 18193 : MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
+ - + + +
+ ]
1760 : : }
1761 : : else
7539 tgl@sss.pgh.pa.us 1762 [ # # ]:UBC 0 : Assert(found);
1763 : :
1764 : : /*
1765 : : * Set up array pointers.
1766 : : */
7539 tgl@sss.pgh.pa.us 1767 :CBC 1071 : OldestMemberMXactId = MultiXactState->perBackendXactIds;
1346 rhaas@postgresql.org 1768 : 1071 : OldestVisibleMXactId = OldestMemberMXactId + MaxOldestSlot;
7539 tgl@sss.pgh.pa.us 1769 : 1071 : }
1770 : :
1771 : : /*
1772 : : * GUC check_hook for multixact_offset_buffers
1773 : : */
1774 : : bool
659 alvherre@alvh.no-ip. 1775 : 1109 : check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
1776 : : {
1777 : 1109 : return check_slru_buffers("multixact_offset_buffers", newval);
1778 : : }
1779 : :
1780 : : /*
1781 : : * GUC check_hook for multixact_member_buffers
1782 : : */
1783 : : bool
1784 : 1109 : check_multixact_member_buffers(int *newval, void **extra, GucSource source)
1785 : : {
1786 : 1109 : return check_slru_buffers("multixact_member_buffers", newval);
1787 : : }
1788 : :
1789 : : /*
1790 : : * This func must be called ONCE on system install. It creates the initial
1791 : : * MultiXact segments. (The MultiXacts directories are assumed to have been
1792 : : * created by initdb, and MultiXactShmemInit must have been called already.)
1793 : : */
1794 : : void
7539 tgl@sss.pgh.pa.us 1795 : 51 : BootStrapMultiXact(void)
1796 : : {
1797 : : /* Zero the initial pages and flush them to disk */
164 alvherre@kurilemu.de 1798 :GNC 51 : SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0);
1799 : 51 : SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
7539 tgl@sss.pgh.pa.us 1800 :CBC 51 : }
1801 : :
1802 : : /*
1803 : : * This must be called ONCE during postmaster or standalone-backend startup.
1804 : : *
1805 : : * StartupXLOG has already established nextMXact/nextOffset by calling
1806 : : * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
1807 : : * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
1808 : : * replayed WAL.
1809 : : */
1810 : : void
1811 : 927 : StartupMultiXact(void)
1812 : : {
4402 alvherre@alvh.no-ip. 1813 : 927 : MultiXactId multi = MultiXactState->nextMXact;
1814 : 927 : MultiXactOffset offset = MultiXactState->nextOffset;
1815 : : int64 pageno;
1816 : :
1817 : : /*
1818 : : * Initialize offset's idea of the latest page number.
1819 : : */
1820 : 927 : pageno = MultiXactIdToOffsetPage(multi);
681 1821 : 927 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1822 : : pageno);
1823 : :
1824 : : /*
1825 : : * Initialize member's idea of the latest page number.
1826 : : */
4402 1827 : 927 : pageno = MXOffsetToMemberPage(offset);
681 1828 : 927 : pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1829 : : pageno);
4402 1830 : 927 : }
1831 : :
1832 : : /*
1833 : : * This must be called ONCE at the end of startup/recovery.
1834 : : */
1835 : : void
1836 : 872 : TrimMultiXact(void)
1837 : : {
1838 : : MultiXactId nextMXact;
1839 : : MultiXactOffset offset;
1840 : : MultiXactId oldestMXact;
1841 : : Oid oldestMXactDB;
1842 : : int64 pageno;
1843 : : int entryno;
1844 : : int flagsoff;
1845 : :
3736 andres@anarazel.de 1846 : 872 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1847 : 872 : nextMXact = MultiXactState->nextMXact;
1848 : 872 : offset = MultiXactState->nextOffset;
1849 : 872 : oldestMXact = MultiXactState->oldestMultiXactId;
1850 : 872 : oldestMXactDB = MultiXactState->oldestMultiXactDB;
1851 : 872 : LWLockRelease(MultiXactGenLock);
1852 : :
1853 : : /* Clean up offsets state */
1854 : :
1855 : : /*
1856 : : * (Re-)Initialize our idea of the latest page number for offsets.
1857 : : */
1858 : 872 : pageno = MultiXactIdToOffsetPage(nextMXact);
681 alvherre@alvh.no-ip. 1859 : 872 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1860 : : pageno);
1861 : :
1862 : : /*
1863 : : * Set the offset of nextMXact on the offsets page. This is normally done
1864 : : * in RecordNewMultiXact() of the previous multixact, but let's be sure
1865 : : * the next page exists, if the nextMXact was reset with pg_resetwal for
1866 : : * example.
1867 : : *
1868 : : * Zero out the remainder of the page. See notes in TrimCLOG() for
1869 : : * background. Unlike CLOG, some WAL record covers every pg_multixact
1870 : : * SLRU mutation. Since, also unlike CLOG, we ignore the WAL rule "write
1871 : : * xlog before data," nextMXact successors may carry obsolete, nonzero
1872 : : * offset values.
1873 : : */
3736 andres@anarazel.de 1874 : 872 : entryno = MultiXactIdToOffsetEntry(nextMXact);
1875 : : {
1876 : : int slotno;
1877 : : MultiXactOffset *offptr;
659 alvherre@alvh.no-ip. 1878 : 872 : LWLock *lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1879 : :
1880 : 872 : LWLockAcquire(lock, LW_EXCLUSIVE);
6 heikki.linnakangas@i 1881 [ + + + + ]:GNC 872 : if (entryno == 0 || nextMXact == FirstMultiXactId)
15 heikki.linnakangas@i 1882 :CBC 855 : slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
1883 : : else
1884 : 17 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
7498 tgl@sss.pgh.pa.us 1885 : 872 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1886 : 872 : offptr += entryno;
1887 : :
15 heikki.linnakangas@i 1888 : 872 : *offptr = offset;
1889 [ + + + - ]: 872 : if (entryno != 0 && (entryno + 1) * sizeof(MultiXactOffset) != BLCKSZ)
1890 [ + - + - : 1493 : MemSet(offptr + 1, 0, BLCKSZ - (entryno + 1) * sizeof(MultiXactOffset));
+ - + + +
+ ]
1891 : :
7348 tgl@sss.pgh.pa.us 1892 : 872 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
659 alvherre@alvh.no-ip. 1893 : 872 : LWLockRelease(lock);
1894 : : }
1895 : :
1896 : : /*
1897 : : * And the same for members.
1898 : : *
1899 : : * (Re-)Initialize our idea of the latest page number for members.
1900 : : */
7498 tgl@sss.pgh.pa.us 1901 : 872 : pageno = MXOffsetToMemberPage(offset);
681 alvherre@alvh.no-ip. 1902 : 872 : pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1903 : : pageno);
1904 : :
1905 : : /*
1906 : : * Zero out the remainder of the current members page. See notes in
1907 : : * TrimCLOG() for motivation.
1908 : : */
4712 1909 : 872 : flagsoff = MXOffsetToFlagsOffset(offset);
1910 [ + + ]: 872 : if (flagsoff != 0)
1911 : : {
1912 : : int slotno;
1913 : : TransactionId *xidptr;
1914 : : int memberoff;
659 1915 : 16 : LWLock *lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1916 : :
1917 : 16 : LWLockAcquire(lock, LW_EXCLUSIVE);
4712 1918 : 16 : memberoff = MXOffsetToMemberOffset(offset);
6714 tgl@sss.pgh.pa.us 1919 : 16 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
4712 alvherre@alvh.no-ip. 1920 : 16 : xidptr = (TransactionId *)
1921 : 16 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1922 : :
1923 [ + + + - : 16 : MemSet(xidptr, 0, BLCKSZ - memberoff);
+ - - + -
- ]
1924 : :
1925 : : /*
1926 : : * Note: we don't need to zero out the flag bits in the remaining
1927 : : * members of the current group, because they are always reset before
1928 : : * writing.
1929 : : */
1930 : :
7348 tgl@sss.pgh.pa.us 1931 : 16 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
659 alvherre@alvh.no-ip. 1932 : 16 : LWLockRelease(lock);
1933 : : }
1934 : :
1935 : : /* signal that we're officially up */
3736 andres@anarazel.de 1936 : 872 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1937 : 872 : MultiXactState->finishedStartup = true;
3849 rhaas@postgresql.org 1938 : 872 : LWLockRelease(MultiXactGenLock);
1939 : :
1940 : : /* Now compute how far away the next multixid wraparound is. */
9 heikki.linnakangas@i 1941 :GNC 872 : SetMultiXactIdLimit(oldestMXact, oldestMXactDB);
7539 tgl@sss.pgh.pa.us 1942 :CBC 872 : }
1943 : :
1944 : : /*
1945 : : * Get the MultiXact data to save in a checkpoint record
1946 : : */
1947 : : void
7498 1948 : 1532 : MultiXactGetCheckptMulti(bool is_shutdown,
1949 : : MultiXactId *nextMulti,
1950 : : MultiXactOffset *nextMultiOffset,
1951 : : MultiXactId *oldestMulti,
1952 : : Oid *oldestMultiDB)
1953 : : {
7539 1954 : 1532 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
7498 1955 : 1532 : *nextMulti = MultiXactState->nextMXact;
1956 : 1532 : *nextMultiOffset = MultiXactState->nextOffset;
4712 alvherre@alvh.no-ip. 1957 : 1532 : *oldestMulti = MultiXactState->oldestMultiXactId;
1958 : 1532 : *oldestMultiDB = MultiXactState->oldestMultiXactDB;
7539 tgl@sss.pgh.pa.us 1959 : 1532 : LWLockRelease(MultiXactGenLock);
1960 : :
1961 : : debug_elog6(DEBUG2,
1962 : : "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
1963 : : *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
1964 : 1532 : }
1965 : :
1966 : : /*
1967 : : * Perform a checkpoint --- either during shutdown, or on-the-fly
1968 : : */
1969 : : void
1970 : 1732 : CheckPointMultiXact(void)
1971 : : {
1972 : : TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_START(true);
1973 : :
1974 : : /*
1975 : : * Write dirty MultiXact pages to disk. This may result in sync requests
1976 : : * queued for later handling by ProcessSyncRequests(), as part of the
1977 : : * checkpoint.
1978 : : */
1910 tmunro@postgresql.or 1979 : 1732 : SimpleLruWriteAll(MultiXactOffsetCtl, true);
1980 : 1732 : SimpleLruWriteAll(MultiXactMemberCtl, true);
1981 : :
1982 : : TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
7539 tgl@sss.pgh.pa.us 1983 : 1732 : }
1984 : :
1985 : : /*
1986 : : * Set the next-to-be-assigned MultiXactId and offset
1987 : : *
1988 : : * This is used when we can determine the correct next ID/offset exactly
1989 : : * from a checkpoint record. Although this is only called during bootstrap
1990 : : * and XLog replay, we take the lock in case any hot-standby backends are
1991 : : * examining the values.
1992 : : */
1993 : : void
7498 1994 : 1009 : MultiXactSetNextMXact(MultiXactId nextMulti,
1995 : : MultiXactOffset nextMultiOffset)
1996 : : {
6 heikki.linnakangas@i 1997 [ - + ]:GNC 1009 : Assert(MultiXactIdIsValid(nextMulti));
1998 : : debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
1999 : : nextMulti, nextMultiOffset);
2000 : :
5064 tgl@sss.pgh.pa.us 2001 :CBC 1009 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
7539 2002 : 1009 : MultiXactState->nextMXact = nextMulti;
7498 2003 : 1009 : MultiXactState->nextOffset = nextMultiOffset;
5064 2004 : 1009 : LWLockRelease(MultiXactGenLock);
7539 2005 : 1009 : }
2006 : :
2007 : : /*
2008 : : * Determine the last safe MultiXactId to allocate given the currently oldest
2009 : : * datminmxid (ie, the oldest MultiXactId that might exist in any database
2010 : : * of our cluster), and the OID of the (or a) database with that value.
2011 : : *
2012 : : * This also updates MultiXactState->oldestOffset, by looking up the offset of
2013 : : * MultiXactState->oldestMultiXactId.
2014 : : */
2015 : : void
9 heikki.linnakangas@i 2016 :GNC 1953 : SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
2017 : : {
2018 : : MultiXactId multiVacLimit;
2019 : : MultiXactId multiWarnLimit;
2020 : : MultiXactId multiStopLimit;
2021 : : MultiXactId multiWrapLimit;
2022 : : MultiXactId curMulti;
2023 : :
4712 alvherre@alvh.no-ip. 2024 [ - + ]:CBC 1953 : Assert(MultiXactIdIsValid(oldest_datminmxid));
2025 : :
2026 : : /*
2027 : : * We pretend that a wrap will happen halfway through the multixact ID
2028 : : * space, but that's not really true, because multixacts wrap differently
2029 : : * from transaction IDs.
2030 : : */
2031 : 1953 : multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2032 [ - + ]: 1953 : if (multiWrapLimit < FirstMultiXactId)
4712 alvherre@alvh.no-ip. 2033 :UBC 0 : multiWrapLimit += FirstMultiXactId;
2034 : :
2035 : : /*
2036 : : * We'll refuse to continue assigning MultiXactIds once we get within 3M
2037 : : * multi of data loss. See SetTransactionIdLimit.
2038 : : */
1965 noah@leadboat.com 2039 :CBC 1953 : multiStopLimit = multiWrapLimit - 3000000;
4712 alvherre@alvh.no-ip. 2040 [ - + ]: 1953 : if (multiStopLimit < FirstMultiXactId)
4712 alvherre@alvh.no-ip. 2041 :UBC 0 : multiStopLimit -= FirstMultiXactId;
2042 : :
2043 : : /*
2044 : : * We'll start complaining loudly when we get within 40M multis of data
2045 : : * loss. This is kind of arbitrary, but if you let your gas gauge get
2046 : : * down to 2% of full, would you be looking for the next gas station? We
2047 : : * need to be fairly liberal about this number because there are lots of
2048 : : * scenarios where most transactions are done by automatic clients that
2049 : : * won't pay attention to warnings. (No, we're not gonna make this
2050 : : * configurable. If you know enough to configure it, you know enough to
2051 : : * not get in this kind of trouble in the first place.)
2052 : : */
1965 noah@leadboat.com 2053 :CBC 1953 : multiWarnLimit = multiWrapLimit - 40000000;
4712 alvherre@alvh.no-ip. 2054 [ - + ]: 1953 : if (multiWarnLimit < FirstMultiXactId)
4712 alvherre@alvh.no-ip. 2055 :UBC 0 : multiWarnLimit -= FirstMultiXactId;
2056 : :
2057 : : /*
2058 : : * We'll start trying to force autovacuums when oldest_datminmxid gets to
2059 : : * be more than autovacuum_multixact_freeze_max_age mxids old.
2060 : : *
2061 : : * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2062 : : * so that we don't have to worry about dealing with on-the-fly changes in
2063 : : * its value. See SetTransactionIdLimit.
2064 : : */
4326 alvherre@alvh.no-ip. 2065 :CBC 1953 : multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age;
4712 2066 [ - + ]: 1953 : if (multiVacLimit < FirstMultiXactId)
4712 alvherre@alvh.no-ip. 2067 :UBC 0 : multiVacLimit += FirstMultiXactId;
2068 : :
2069 : : /* Grab lock for just long enough to set the new limit values */
4712 alvherre@alvh.no-ip. 2070 :CBC 1953 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2071 : 1953 : MultiXactState->oldestMultiXactId = oldest_datminmxid;
2072 : 1953 : MultiXactState->oldestMultiXactDB = oldest_datoid;
2073 : 1953 : MultiXactState->multiVacLimit = multiVacLimit;
2074 : 1953 : MultiXactState->multiWarnLimit = multiWarnLimit;
2075 : 1953 : MultiXactState->multiStopLimit = multiStopLimit;
2076 : 1953 : MultiXactState->multiWrapLimit = multiWrapLimit;
2077 : 1953 : curMulti = MultiXactState->nextMXact;
2078 : 1953 : LWLockRelease(MultiXactGenLock);
2079 : :
2080 : : /* Log the info */
2081 [ + + ]: 1953 : ereport(DEBUG1,
2082 : : (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2083 : : multiWrapLimit, oldest_datoid)));
2084 : :
2085 : : /*
2086 : : * Computing the actual limits is only possible once the data directory is
2087 : : * in a consistent state. There's no need to compute the limits while
2088 : : * still replaying WAL - no decisions about new multis are made even
2089 : : * though multixact creations might be replayed. So we'll only do further
2090 : : * checks after TrimMultiXact() has been called.
2091 : : */
3736 andres@anarazel.de 2092 [ + + ]: 1953 : if (!MultiXactState->finishedStartup)
2093 : 978 : return;
2094 : :
2095 [ - + ]: 975 : Assert(!InRecovery);
2096 : :
2097 : : /*
2098 : : * Offsets are 64-bits wide and never wrap around, so we don't need to
2099 : : * consider them for emergency autovacuum purposes. But now that we're in
2100 : : * a consistent state, determine MultiXactState->oldestOffset. It will be
2101 : : * used to adjust the freezing cutoff, to keep the offsets disk usage in
2102 : : * check.
2103 : : */
9 heikki.linnakangas@i 2104 :GNC 975 : SetOldestOffset();
2105 : :
2106 : : /*
2107 : : * If past the autovacuum force point, immediately signal an autovac
2108 : : * request. The reason for this is that autovac only processes one
2109 : : * database per invocation. Once it's finished cleaning up the oldest
2110 : : * database, it'll call here, and we'll signal the postmaster to start
2111 : : * another iteration immediately if there are still any old databases.
2112 : : */
2113 [ - + - - ]: 975 : if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
4712 alvherre@alvh.no-ip. 2114 :UBC 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
2115 : :
2116 : : /* Give an immediate warning if past the wrap warn point */
3736 andres@anarazel.de 2117 [ - + ]:CBC 975 : if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2118 : : {
2119 : : char *oldest_datname;
2120 : :
2121 : : /*
2122 : : * We can be called when not inside a transaction, for example during
2123 : : * StartupXLOG(). In such a case we cannot do database access, so we
2124 : : * must just report the oldest DB's OID.
2125 : : *
2126 : : * Note: it's also possible that get_database_name fails and returns
2127 : : * NULL, for example because the database just got dropped. We'll
2128 : : * still warn, even though the warning might now be unnecessary.
2129 : : */
4712 alvherre@alvh.no-ip. 2130 [ # # ]:UBC 0 : if (IsTransactionState())
2131 : 0 : oldest_datname = get_database_name(oldest_datoid);
2132 : : else
2133 : 0 : oldest_datname = NULL;
2134 : :
2135 [ # # ]: 0 : if (oldest_datname)
2136 [ # # ]: 0 : ereport(WARNING,
2137 : : (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2138 : : "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2139 : : multiWrapLimit - curMulti,
2140 : : oldest_datname,
2141 : : multiWrapLimit - curMulti),
2142 : : errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2143 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2144 : : else
2145 [ # # ]: 0 : ereport(WARNING,
2146 : : (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2147 : : "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2148 : : multiWrapLimit - curMulti,
2149 : : oldest_datoid,
2150 : : multiWrapLimit - curMulti),
2151 : : errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2152 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2153 : : }
2154 : : }
2155 : :
2156 : : /*
2157 : : * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2158 : : * and similarly nextOffset is at least minMultiOffset.
2159 : : *
2160 : : * This is used when we can determine minimum safe values from an XLog
2161 : : * record (either an on-line checkpoint or an mxact creation log entry).
2162 : : * Although this is only called during XLog replay, we take the lock in case
2163 : : * any hot-standby backends are examining the values.
2164 : : */
2165 : : void
7498 tgl@sss.pgh.pa.us 2166 :CBC 673 : MultiXactAdvanceNextMXact(MultiXactId minMulti,
2167 : : MultiXactOffset minMultiOffset)
2168 : : {
6 heikki.linnakangas@i 2169 [ - + ]:GNC 673 : Assert(MultiXactIdIsValid(minMulti));
2170 : :
5064 tgl@sss.pgh.pa.us 2171 :CBC 673 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
7539 2172 [ + + ]: 673 : if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
2173 : : {
2174 : : debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
2175 : 2 : MultiXactState->nextMXact = minMulti;
2176 : : }
9 heikki.linnakangas@i 2177 [ + + ]:GNC 673 : if (MultiXactState->nextOffset < minMultiOffset)
2178 : : {
2179 : : debug_elog3(DEBUG2, "MultiXact: setting next offset to %" PRIu64,
2180 : : minMultiOffset);
7498 tgl@sss.pgh.pa.us 2181 :CBC 2 : MultiXactState->nextOffset = minMultiOffset;
2182 : : }
5064 2183 : 673 : LWLockRelease(MultiXactGenLock);
7539 2184 : 673 : }
2185 : :
2186 : : /*
2187 : : * Update our oldestMultiXactId value, but only if it's more recent than what
2188 : : * we had.
2189 : : *
2190 : : * This may only be called during WAL replay.
2191 : : */
2192 : : void
4712 alvherre@alvh.no-ip. 2193 : 702 : MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2194 : : {
3736 andres@anarazel.de 2195 [ - + ]: 702 : Assert(InRecovery);
2196 : :
4712 alvherre@alvh.no-ip. 2197 [ - + ]: 702 : if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
9 heikki.linnakangas@i 2198 :UNC 0 : SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
4192 alvherre@alvh.no-ip. 2199 :CBC 702 : }
2200 : :
2201 : : /*
2202 : : * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2203 : : *
2204 : : * NB: this is called while holding MultiXactGenLock. We want it to be very
2205 : : * fast most of the time; even when it's not so fast, no actual I/O need
2206 : : * happen unless we're forced to write out a dirty log or xlog page to make
2207 : : * room in shared memory.
2208 : : */
2209 : : static void
7539 tgl@sss.pgh.pa.us 2210 : 5309 : ExtendMultiXactOffset(MultiXactId multi)
2211 : : {
2212 : : int64 pageno;
2213 : : LWLock *lock;
2214 : :
2215 : : /*
2216 : : * No work except at first MultiXactId of a page. But beware: just after
2217 : : * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2218 : : */
2219 [ + + + + ]: 5309 : if (MultiXactIdToOffsetEntry(multi) != 0 &&
2220 : : multi != FirstMultiXactId)
2221 : 5304 : return;
2222 : :
7539 tgl@sss.pgh.pa.us 2223 :GBC 5 : pageno = MultiXactIdToOffsetPage(multi);
659 alvherre@alvh.no-ip. 2224 : 5 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
2225 : :
2226 : 5 : LWLockAcquire(lock, LW_EXCLUSIVE);
2227 : :
2228 : : /* Zero the page and make a WAL entry about it */
164 alvherre@kurilemu.de 2229 :GNC 5 : SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
2230 : 5 : XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE,
2231 : : pageno);
2232 : :
659 alvherre@alvh.no-ip. 2233 :GBC 5 : LWLockRelease(lock);
2234 : : }
2235 : :
2236 : : /*
2237 : : * Make sure that MultiXactMember has room for the members of a newly-
2238 : : * allocated MultiXactId.
2239 : : *
2240 : : * Like the above routine, this is called while holding MultiXactGenLock;
2241 : : * same comments apply.
2242 : : */
2243 : : static void
7498 tgl@sss.pgh.pa.us 2244 :CBC 5309 : ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
2245 : : {
2246 : : /*
2247 : : * It's possible that the members span more than one page of the members
2248 : : * file, so we loop to ensure we consider each page. The coding is not
2249 : : * optimal if the members span several pages, but that seems unusual
2250 : : * enough to not worry much about.
2251 : : */
2252 [ + + ]: 10672 : while (nmembers > 0)
2253 : : {
2254 : : int flagsoff;
2255 : : int flagsbit;
2256 : : uint32 difference;
2257 : :
2258 : : /*
2259 : : * Only zero when at first entry of a page.
2260 : : */
4712 alvherre@alvh.no-ip. 2261 : 5363 : flagsoff = MXOffsetToFlagsOffset(offset);
2262 : 5363 : flagsbit = MXOffsetToFlagsBitShift(offset);
2263 [ + + + + ]: 5363 : if (flagsoff == 0 && flagsbit == 0)
2264 : : {
2265 : : int64 pageno;
2266 : : LWLock *lock;
2267 : :
7498 tgl@sss.pgh.pa.us 2268 : 57 : pageno = MXOffsetToMemberPage(offset);
659 alvherre@alvh.no-ip. 2269 : 57 : lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
2270 : :
2271 : 57 : LWLockAcquire(lock, LW_EXCLUSIVE);
2272 : :
2273 : : /* Zero the page and make a WAL entry about it */
164 alvherre@kurilemu.de 2274 :GNC 57 : SimpleLruZeroPage(MultiXactMemberCtl, pageno);
2275 : 57 : XLogSimpleInsertInt64(RM_MULTIXACT_ID,
2276 : : XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno);
2277 : :
659 alvherre@alvh.no-ip. 2278 :CBC 57 : LWLockRelease(lock);
2279 : : }
2280 : :
2281 : : /* Compute the number of items till end of current page. */
9 heikki.linnakangas@i 2282 :GNC 5363 : difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
2283 : :
2284 : : /*
2285 : : * Advance to next page. OK if nmembers goes negative.
2286 : : */
4210 alvherre@alvh.no-ip. 2287 :CBC 5363 : nmembers -= difference;
2288 : 5363 : offset += difference;
2289 : : }
7539 tgl@sss.pgh.pa.us 2290 : 5309 : }
2291 : :
2292 : : /*
2293 : : * GetOldestMultiXactId
2294 : : *
2295 : : * Return the oldest MultiXactId that's still possibly still seen as live by
2296 : : * any running transaction. Older ones might still exist on disk, but they no
2297 : : * longer have any running member transaction.
2298 : : *
2299 : : * It's not safe to truncate MultiXact SLRU segments on the value returned by
2300 : : * this function; however, it can be set as the new relminmxid for any table
2301 : : * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2302 : : * to truncate SLRUs when no table can possibly still have a referencing MXID.
2303 : : */
2304 : : MultiXactId
4712 alvherre@alvh.no-ip. 2305 : 48385 : GetOldestMultiXactId(void)
2306 : : {
2307 : : MultiXactId oldestMXact;
2308 : : int i;
2309 : :
2310 : : /*
2311 : : * This is the oldest valid value among all the OldestMemberMXactId[] and
2312 : : * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2313 : : */
7539 tgl@sss.pgh.pa.us 2314 : 48385 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
6 heikki.linnakangas@i 2315 :GNC 48385 : oldestMXact = MultiXactState->nextMXact;
655 heikki.linnakangas@i 2316 [ + + ]:CBC 5789379 : for (i = 0; i < MaxOldestSlot; i++)
2317 : : {
2318 : : MultiXactId thisoldest;
2319 : :
7539 tgl@sss.pgh.pa.us 2320 : 5740994 : thisoldest = OldestMemberMXactId[i];
2321 [ + + + + ]: 5770008 : if (MultiXactIdIsValid(thisoldest) &&
2322 : 29014 : MultiXactIdPrecedes(thisoldest, oldestMXact))
2323 : 9 : oldestMXact = thisoldest;
2324 : 5740994 : thisoldest = OldestVisibleMXactId[i];
2325 [ + + + + ]: 5741120 : if (MultiXactIdIsValid(thisoldest) &&
2326 : 126 : MultiXactIdPrecedes(thisoldest, oldestMXact))
2327 : 2 : oldestMXact = thisoldest;
2328 : : }
2329 : :
2330 : 48385 : LWLockRelease(MultiXactGenLock);
2331 : :
4712 alvherre@alvh.no-ip. 2332 : 48385 : return oldestMXact;
2333 : : }
2334 : :
2335 : : /*
2336 : : * Calculate the oldest member offset and install it in MultiXactState, where
2337 : : * it can be used to adjust multixid freezing cutoffs.
2338 : : */
2339 : : static void
9 heikki.linnakangas@i 2340 :GNC 975 : SetOldestOffset(void)
2341 : : {
2342 : : MultiXactId oldestMultiXactId;
2343 : : MultiXactId nextMXact;
3736 andres@anarazel.de 2344 :CBC 975 : MultiXactOffset oldestOffset = 0; /* placate compiler */
2345 : : MultiXactOffset nextOffset;
3849 rhaas@postgresql.org 2346 : 975 : bool oldestOffsetKnown = false;
2347 : :
2348 : : /*
2349 : : * NB: Have to prevent concurrent truncation, we might otherwise try to
2350 : : * lookup an oldestMulti that's concurrently getting truncated away.
2351 : : */
3736 andres@anarazel.de 2352 : 975 : LWLockAcquire(MultiXactTruncationLock, LW_SHARED);
2353 : :
2354 : : /* Read relevant fields from shared memory. */
3849 rhaas@postgresql.org 2355 : 975 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
2356 : 975 : oldestMultiXactId = MultiXactState->oldestMultiXactId;
2357 : 975 : nextMXact = MultiXactState->nextMXact;
2358 : 975 : nextOffset = MultiXactState->nextOffset;
3736 andres@anarazel.de 2359 [ - + ]: 975 : Assert(MultiXactState->finishedStartup);
3887 alvherre@alvh.no-ip. 2360 : 975 : LWLockRelease(MultiXactGenLock);
2361 : :
2362 : : /*
2363 : : * Determine the offset of the oldest multixact. Normally, we can read
2364 : : * the offset from the multixact itself, but there's an important special
2365 : : * case: if there are no multixacts in existence at all, oldestMXact
2366 : : * obviously can't point to one. It will instead point to the multixact
2367 : : * ID that will be assigned the next time one is needed.
2368 : : */
3849 rhaas@postgresql.org 2369 [ + + ]: 975 : if (oldestMultiXactId == nextMXact)
2370 : : {
2371 : : /*
2372 : : * When the next multixact gets created, it will be stored at the next
2373 : : * offset.
2374 : : */
2375 : 956 : oldestOffset = nextOffset;
2376 : 956 : oldestOffsetKnown = true;
2377 : : }
2378 : : else
2379 : : {
2380 : : /*
2381 : : * Look up the offset at which the oldest existing multixact's members
2382 : : * are stored. If we cannot find it, be careful not to fail, and
2383 : : * leave oldestOffset unchanged. oldestOffset is initialized to zero
2384 : : * at system startup, which prevents truncating members until a proper
2385 : : * value is calculated.
2386 : : *
2387 : : * (We had bugs in early releases of PostgreSQL 9.3.X and 9.4.X where
2388 : : * the supposedly-earliest multixact might not really exist. Those
2389 : : * should be long gone by now, so this should not fail, but let's
2390 : : * still be defensive.)
2391 : : */
2392 : : oldestOffsetKnown =
2393 : 19 : find_multixact_start(oldestMultiXactId, &oldestOffset);
2394 : :
2395 [ + - ]: 19 : if (oldestOffsetKnown)
2396 [ - + ]: 19 : ereport(DEBUG1,
2397 : : (errmsg_internal("oldest MultiXactId member is at offset %" PRIu64,
2398 : : oldestOffset)));
2399 : : else
3736 andres@anarazel.de 2400 [ # # ]:UBC 0 : ereport(LOG,
2401 : : (errmsg("MultiXact member truncation is disabled because oldest checkpointed MultiXact %u does not exist on disk",
2402 : : oldestMultiXactId)));
2403 : : }
2404 : :
3736 andres@anarazel.de 2405 :CBC 975 : LWLockRelease(MultiXactTruncationLock);
2406 : :
2407 : : /* Install the computed value */
2408 [ + - ]: 975 : if (oldestOffsetKnown)
2409 : : {
9 heikki.linnakangas@i 2410 :GNC 975 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2411 : 975 : MultiXactState->oldestOffset = oldestOffset;
2412 : 975 : LWLockRelease(MultiXactGenLock);
2413 : : }
3887 alvherre@alvh.no-ip. 2414 :GIC 975 : }
2415 : :
2416 : : /*
2417 : : * Find the starting offset of the given MultiXactId.
2418 : : *
2419 : : * Returns false if the file containing the multi does not exist on disk.
2420 : : * Otherwise, returns true and sets *result to the starting member offset.
2421 : : *
2422 : : * This function does not prevent concurrent truncation, so if that's
2423 : : * required, the caller has to protect against that.
2424 : : */
2425 : : static bool
3849 rhaas@postgresql.org 2426 :CBC 19 : find_multixact_start(MultiXactId multi, MultiXactOffset *result)
2427 : : {
2428 : : MultiXactOffset offset;
2429 : : int64 pageno;
2430 : : int entryno;
2431 : : int slotno;
2432 : : MultiXactOffset *offptr;
2433 : :
3736 andres@anarazel.de 2434 [ - + ]: 19 : Assert(MultiXactState->finishedStartup);
2435 : :
3887 alvherre@alvh.no-ip. 2436 : 19 : pageno = MultiXactIdToOffsetPage(multi);
2437 : 19 : entryno = MultiXactIdToOffsetEntry(multi);
2438 : :
2439 : : /*
2440 : : * Write out dirty data, so PhysicalPageExists can work correctly.
2441 : : */
1910 tmunro@postgresql.or 2442 : 19 : SimpleLruWriteAll(MultiXactOffsetCtl, true);
2443 : 19 : SimpleLruWriteAll(MultiXactMemberCtl, true);
2444 : :
3849 rhaas@postgresql.org 2445 [ - + ]: 19 : if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
3849 rhaas@postgresql.org 2446 :UBC 0 : return false;
2447 : :
2448 : : /* lock is acquired by SimpleLruReadPage_ReadOnly */
3887 alvherre@alvh.no-ip. 2449 :CBC 19 : slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
2450 : 19 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2451 : 19 : offptr += entryno;
2452 : 19 : offset = *offptr;
659 2453 : 19 : LWLockRelease(SimpleLruGetBankLock(MultiXactOffsetCtl, pageno));
2454 : :
3849 rhaas@postgresql.org 2455 : 19 : *result = offset;
2456 : 19 : return true;
2457 : : }
2458 : :
2459 : : /*
2460 : : * GetMultiXactInfo
2461 : : *
2462 : : * Returns information about the current MultiXact state, as of:
2463 : : * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
2464 : : * members: Number of member entries (nextOffset - oldestOffset)
2465 : : * oldestMultiXactId: Oldest MultiXact ID still in use
2466 : : * oldestOffset: Oldest offset still in use
2467 : : */
2468 : : void
121 michael@paquier.xyz 2469 :GNC 13920 : GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
2470 : : MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
2471 : : {
2472 : : MultiXactOffset nextOffset;
2473 : : MultiXactId nextMultiXactId;
2474 : :
3877 rhaas@postgresql.org 2475 :CBC 13920 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
2476 : 13920 : nextOffset = MultiXactState->nextOffset;
121 michael@paquier.xyz 2477 :GNC 13920 : *oldestMultiXactId = MultiXactState->oldestMultiXactId;
3877 rhaas@postgresql.org 2478 :CBC 13920 : nextMultiXactId = MultiXactState->nextMXact;
121 michael@paquier.xyz 2479 :GNC 13920 : *oldestOffset = MultiXactState->oldestOffset;
3877 rhaas@postgresql.org 2480 :CBC 13920 : LWLockRelease(MultiXactGenLock);
2481 : :
121 michael@paquier.xyz 2482 :GNC 13920 : *members = nextOffset - *oldestOffset;
2483 : 13920 : *multixacts = nextMultiXactId - *oldestMultiXactId;
3877 rhaas@postgresql.org 2484 :GIC 13920 : }
2485 : :
2486 : : /*
2487 : : * Multixact members can be removed once the multixacts that refer to them
2488 : : * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2489 : : * vacuum_multixact_freeze_table_age work together to make sure we never have
2490 : : * too many multixacts; we hope that, at least under normal circumstances,
2491 : : * this will also be sufficient to keep us from using too many offsets.
2492 : : * However, if the average multixact has many members, we might accumulate a
2493 : : * large amount of members, consuming disk space, while still using few enough
2494 : : * multixids that the multixid limits fail to trigger relminmxid advancement
2495 : : * by VACUUM.
2496 : : *
2497 : : * To prevent that, if the members space usage exceeds a threshold
2498 : : * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
2499 : : * autovacuum_multixact_freeze_max_age to a value just less than the number of
2500 : : * multixacts in use. We hope that this will quickly trigger autovacuuming on
2501 : : * the table or tables with the oldest relminmxid, thus allowing datminmxid
2502 : : * values to advance and removing some members.
2503 : : *
2504 : : * As the amount of the member space in use grows, we become more aggressive
2505 : : * in clamping this value. That not only causes autovacuum to ramp up, but
2506 : : * also makes any manual vacuums the user issues more aggressive. This
2507 : : * happens because vacuum_get_cutoffs() will clamp the freeze table and the
2508 : : * minimum freeze age cutoffs based on the effective
2509 : : * autovacuum_multixact_freeze_max_age this function returns. At the extreme,
2510 : : * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we clamp
2511 : : * freeze_max_age to zero, and every vacuum of any table will freeze every
2512 : : * multixact.
2513 : : */
2514 : : int
3877 rhaas@postgresql.org 2515 :CBC 13920 : MultiXactMemberFreezeThreshold(void)
2516 : : {
2517 : : MultiXactOffset members;
2518 : : uint32 multixacts;
2519 : : uint32 victim_multixacts;
2520 : : double fraction;
2521 : : int result;
2522 : : MultiXactId oldestMultiXactId;
2523 : : MultiXactOffset oldestOffset;
2524 : :
2525 : : /* Read the current offsets and members usage. */
9 heikki.linnakangas@i 2526 :GNC 13920 : GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset);
2527 : :
2528 : : /* If member space utilization is low, no special action is required. */
2529 [ + - ]: 13920 : if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
3877 rhaas@postgresql.org 2530 :CBC 13920 : return autovacuum_multixact_freeze_max_age;
2531 : :
2532 : : /*
2533 : : * Compute a target for relminmxid advancement. The number of multixacts
2534 : : * we try to eliminate from the system is based on how far we are past
2535 : : * MULTIXACT_MEMBER_LOW_THRESHOLD.
2536 : : *
2537 : : * The way this formula works is that when members is exactly at the low
2538 : : * threshold, fraction = 0.0, and we set freeze_max_age equal to
2539 : : * mxid_age(oldestMultiXactId). As members grows further, towards the
2540 : : * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
2541 : : * shrinks from mxid_age(oldestMultiXactId) to 0. Beyond the high
2542 : : * threshold, fraction > 1.0 and the result is clamped to 0.
2543 : : */
9 heikki.linnakangas@i 2544 :UNC 0 : fraction = (double) (members - MULTIXACT_MEMBER_LOW_THRESHOLD) /
2545 : : (MULTIXACT_MEMBER_HIGH_THRESHOLD - MULTIXACT_MEMBER_LOW_THRESHOLD);
2546 : :
2547 : : /* fraction could be > 1.0, but lowest possible freeze age is zero */
2548 [ # # ]: 0 : if (fraction >= 1.0)
3877 rhaas@postgresql.org 2549 :UBC 0 : return 0;
2550 : :
9 heikki.linnakangas@i 2551 :UNC 0 : victim_multixacts = multixacts * fraction;
553 heikki.linnakangas@i 2552 :UBC 0 : result = multixacts - victim_multixacts;
2553 : :
2554 : : /*
2555 : : * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
2556 : : * autovacuum less aggressive than it would otherwise be.
2557 : : */
2558 : 0 : return Min(result, autovacuum_multixact_freeze_max_age);
2559 : : }
2560 : :
2561 : : typedef struct mxtruncinfo
2562 : : {
2563 : : int64 earliestExistingPage;
2564 : : } mxtruncinfo;
2565 : :
2566 : : /*
2567 : : * SlruScanDirectory callback
2568 : : * This callback determines the earliest existing page number.
2569 : : */
2570 : : static bool
750 akorotkov@postgresql 2571 : 0 : SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data)
2572 : : {
4586 bruce@momjian.us 2573 : 0 : mxtruncinfo *trunc = (mxtruncinfo *) data;
2574 : :
4712 alvherre@alvh.no-ip. 2575 [ # # # # ]: 0 : if (trunc->earliestExistingPage == -1 ||
2576 : 0 : ctl->PagePrecedes(segpage, trunc->earliestExistingPage))
2577 : : {
2578 : 0 : trunc->earliestExistingPage = segpage;
2579 : : }
2580 : :
4586 bruce@momjian.us 2581 : 0 : return false; /* keep going */
2582 : : }
2583 : :
2584 : :
2585 : : /*
2586 : : * Delete members segments [oldest, newOldest)
2587 : : */
2588 : : static void
3736 andres@anarazel.de 2589 : 0 : PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
2590 : : {
9 heikki.linnakangas@i 2591 :UNC 0 : SimpleLruTruncate(MultiXactMemberCtl,
2592 : : MXOffsetToMemberPage(newOldestOffset));
3736 andres@anarazel.de 2593 :UBC 0 : }
2594 : :
2595 : : /*
2596 : : * Delete offsets segments [oldest, newOldest)
2597 : : */
2598 : : static void
2599 : 0 : PerformOffsetsTruncation(MultiXactId oldestMulti, MultiXactId newOldestMulti)
2600 : : {
2601 : : /*
2602 : : * We step back one multixact to avoid passing a cutoff page that hasn't
2603 : : * been created yet in the rare case that oldestMulti would be the first
2604 : : * item on a page and oldestMulti == nextMulti. In that case, if we
2605 : : * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
2606 : : * detection.
2607 : : */
2608 : 0 : SimpleLruTruncate(MultiXactOffsetCtl,
2609 : : MultiXactIdToOffsetPage(PreviousMultiXactId(newOldestMulti)));
2610 : 0 : }
2611 : :
2612 : : /*
2613 : : * Remove all MultiXactOffset and MultiXactMember segments before the oldest
2614 : : * ones still of interest.
2615 : : *
2616 : : * This is only called on a primary as part of vacuum (via
2617 : : * vac_truncate_clog()). During recovery truncation is done by replaying
2618 : : * truncation WAL records logged here.
2619 : : *
2620 : : * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
2621 : : * is one of the databases preventing newOldestMulti from increasing.
2622 : : */
2623 : : void
3736 andres@anarazel.de 2624 :CBC 103 : TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
2625 : : {
2626 : : MultiXactId oldestMulti;
2627 : : MultiXactId nextMulti;
2628 : : MultiXactOffset newOldestOffset;
2629 : : MultiXactOffset oldestOffset;
2630 : : MultiXactOffset nextOffset;
2631 : : mxtruncinfo trunc;
2632 : : MultiXactId earliest;
2633 : :
2634 [ - + ]: 103 : Assert(!RecoveryInProgress());
2635 [ - + ]: 103 : Assert(MultiXactState->finishedStartup);
6 heikki.linnakangas@i 2636 [ - + ]:GNC 103 : Assert(MultiXactIdIsValid(newOldestMulti));
2637 : :
2638 : : /*
2639 : : * We can only allow one truncation to happen at once. Otherwise parts of
2640 : : * members might vanish while we're doing lookups or similar. There's no
2641 : : * need to have an interlock with creating new multis or such, since those
2642 : : * are constrained by the limits (which only grow, never shrink).
2643 : : */
3736 andres@anarazel.de 2644 :CBC 103 : LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
2645 : :
4192 alvherre@alvh.no-ip. 2646 : 103 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
3736 andres@anarazel.de 2647 : 103 : nextMulti = MultiXactState->nextMXact;
3849 rhaas@postgresql.org 2648 : 103 : nextOffset = MultiXactState->nextOffset;
3736 andres@anarazel.de 2649 : 103 : oldestMulti = MultiXactState->oldestMultiXactId;
4192 alvherre@alvh.no-ip. 2650 : 103 : LWLockRelease(MultiXactGenLock);
2651 : :
2652 : : /*
2653 : : * Make sure to only attempt truncation if there's values to truncate
2654 : : * away. In normal processing values shouldn't go backwards, but there's
2655 : : * some corner cases (due to bugs) where that's possible.
2656 : : */
3736 andres@anarazel.de 2657 [ + - ]: 103 : if (MultiXactIdPrecedesOrEquals(newOldestMulti, oldestMulti))
2658 : : {
2659 : 103 : LWLockRelease(MultiXactTruncationLock);
2660 : 103 : return;
2661 : : }
2662 : :
2663 : : /*
2664 : : * Note we can't just plow ahead with the truncation; it's possible that
2665 : : * there are no segments to truncate, which is a problem because we are
2666 : : * going to attempt to read the offsets page to determine where to
2667 : : * truncate the members SLRU. So we first scan the directory to determine
2668 : : * the earliest offsets page number that we can read without error.
2669 : : *
2670 : : * When nextMXact is less than one segment away from multiWrapLimit,
2671 : : * SlruScanDirCbFindEarliest can find some early segment other than the
2672 : : * actual earliest. (MultiXactOffsetPagePrecedes(EARLIEST, LATEST)
2673 : : * returns false, because not all pairs of entries have the same answer.)
2674 : : * That can also arise when an earlier truncation attempt failed unlink()
2675 : : * or returned early from this function. The only consequence is
2676 : : * returning early, which wastes space that we could have liberated.
2677 : : *
2678 : : * NB: It's also possible that the page that oldestMulti is on has already
2679 : : * been truncated away, and we crashed before updating oldestMulti.
2680 : : */
4712 alvherre@alvh.no-ip. 2681 :UBC 0 : trunc.earliestExistingPage = -1;
2682 : 0 : SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc);
2683 : 0 : earliest = trunc.earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE;
4192 2684 [ # # ]: 0 : if (earliest < FirstMultiXactId)
2685 : 0 : earliest = FirstMultiXactId;
2686 : :
2687 : : /* If there's nothing to remove, we can bail out early. */
3736 andres@anarazel.de 2688 [ # # ]: 0 : if (MultiXactIdPrecedes(oldestMulti, earliest))
2689 : : {
2690 : 0 : LWLockRelease(MultiXactTruncationLock);
7539 tgl@sss.pgh.pa.us 2691 : 0 : return;
2692 : : }
2693 : :
2694 : : /*
2695 : : * First, compute the safe truncation point for MultiXactMember. This is
2696 : : * the starting offset of the oldest multixact.
2697 : : *
2698 : : * Hopefully, find_multixact_start will always work here, because we've
2699 : : * already checked that it doesn't precede the earliest MultiXact on disk.
2700 : : * But if it fails, don't truncate anything, and log a message.
2701 : : */
3736 andres@anarazel.de 2702 [ # # ]: 0 : if (oldestMulti == nextMulti)
2703 : : {
2704 : : /* there are NO MultiXacts */
2705 : 0 : oldestOffset = nextOffset;
2706 : : }
2707 [ # # ]: 0 : else if (!find_multixact_start(oldestMulti, &oldestOffset))
2708 : : {
3849 rhaas@postgresql.org 2709 [ # # ]: 0 : ereport(LOG,
2710 : : (errmsg("oldest MultiXact %u not found, earliest MultiXact %u, skipping truncation",
2711 : : oldestMulti, earliest)));
3736 andres@anarazel.de 2712 : 0 : LWLockRelease(MultiXactTruncationLock);
3849 rhaas@postgresql.org 2713 : 0 : return;
2714 : : }
2715 : :
2716 : : /*
2717 : : * Secondly compute up to where to truncate. Lookup the corresponding
2718 : : * member offset for newOldestMulti for that.
2719 : : */
3736 andres@anarazel.de 2720 [ # # ]: 0 : if (newOldestMulti == nextMulti)
2721 : : {
2722 : : /* there are NO MultiXacts */
2723 : 0 : newOldestOffset = nextOffset;
2724 : : }
2725 [ # # ]: 0 : else if (!find_multixact_start(newOldestMulti, &newOldestOffset))
2726 : : {
2727 [ # # ]: 0 : ereport(LOG,
2728 : : (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
2729 : : newOldestMulti)));
2730 : 0 : LWLockRelease(MultiXactTruncationLock);
2731 : 0 : return;
2732 : : }
2733 : :
2734 [ # # ]: 0 : elog(DEBUG1, "performing multixact truncation: "
2735 : : "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
2736 : : "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
2737 : : oldestMulti, newOldestMulti,
2738 : : MultiXactIdToOffsetSegment(oldestMulti),
2739 : : MultiXactIdToOffsetSegment(newOldestMulti),
2740 : : oldestOffset, newOldestOffset,
2741 : : MXOffsetToMemberSegment(oldestOffset),
2742 : : MXOffsetToMemberSegment(newOldestOffset));
2743 : :
2744 : : /*
2745 : : * Do truncation, and the WAL logging of the truncation, in a critical
2746 : : * section. That way offsets/members cannot get out of sync anymore, i.e.
2747 : : * once consistent the newOldestMulti will always exist in members, even
2748 : : * if we crashed in the wrong moment.
2749 : : */
2750 : 0 : START_CRIT_SECTION();
2751 : :
2752 : : /*
2753 : : * Prevent checkpoints from being scheduled concurrently. This is critical
2754 : : * because otherwise a truncation record might not be replayed after a
2755 : : * crash/basebackup, even though the state of the data directory would
2756 : : * require it.
2757 : : */
1350 rhaas@postgresql.org 2758 [ # # ]: 0 : Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0);
2759 : 0 : MyProc->delayChkptFlags |= DELAY_CHKPT_START;
2760 : :
2761 : : /* WAL log truncation */
3736 andres@anarazel.de 2762 : 0 : WriteMTruncateXlogRec(newOldestMultiDB,
2763 : : oldestMulti, newOldestMulti,
2764 : : oldestOffset, newOldestOffset);
2765 : :
2766 : : /*
2767 : : * Update in-memory limits before performing the truncation, while inside
2768 : : * the critical section: Have to do it before truncation, to prevent
2769 : : * concurrent lookups of those values. Has to be inside the critical
2770 : : * section as otherwise a future call to this function would error out,
2771 : : * while looking up the oldest member in offsets, if our caller crashes
2772 : : * before updating the limits.
2773 : : */
2774 : 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2775 : 0 : MultiXactState->oldestMultiXactId = newOldestMulti;
2776 : 0 : MultiXactState->oldestMultiXactDB = newOldestMultiDB;
9 heikki.linnakangas@i 2777 :UNC 0 : MultiXactState->oldestOffset = newOldestOffset;
3736 andres@anarazel.de 2778 :UBC 0 : LWLockRelease(MultiXactGenLock);
2779 : :
2780 : : /* First truncate members */
2781 : 0 : PerformMembersTruncation(oldestOffset, newOldestOffset);
2782 : :
2783 : : /* Then offsets */
2784 : 0 : PerformOffsetsTruncation(oldestMulti, newOldestMulti);
2785 : :
1350 rhaas@postgresql.org 2786 : 0 : MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
2787 : :
3736 andres@anarazel.de 2788 [ # # ]: 0 : END_CRIT_SECTION();
2789 : 0 : LWLockRelease(MultiXactTruncationLock);
2790 : : }
2791 : :
2792 : : /*
2793 : : * Decide whether a MultiXactOffset page number is "older" for truncation
2794 : : * purposes. Analogous to CLOGPagePrecedes().
2795 : : *
2796 : : * Offsetting the values is optional, because MultiXactIdPrecedes() has
2797 : : * translational symmetry.
2798 : : */
2799 : : static bool
750 akorotkov@postgresql 2800 :CBC 41769 : MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
2801 : : {
2802 : : MultiXactId multi1;
2803 : : MultiXactId multi2;
2804 : :
7539 tgl@sss.pgh.pa.us 2805 : 41769 : multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE;
1797 noah@leadboat.com 2806 : 41769 : multi1 += FirstMultiXactId + 1;
7539 tgl@sss.pgh.pa.us 2807 : 41769 : multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE;
1797 noah@leadboat.com 2808 : 41769 : multi2 += FirstMultiXactId + 1;
2809 : :
2810 [ + + + + ]: 69615 : return (MultiXactIdPrecedes(multi1, multi2) &&
2811 : 27846 : MultiXactIdPrecedes(multi1,
2812 : : multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));
2813 : : }
2814 : :
2815 : : /*
2816 : : * Decide whether a MultiXactMember page number is "older" for truncation
2817 : : * purposes. There is no "invalid offset number" and members never wrap
2818 : : * around, so use the numbers verbatim.
2819 : : */
2820 : : static bool
750 akorotkov@postgresql 2821 :UBC 0 : MultiXactMemberPagePrecedes(int64 page1, int64 page2)
2822 : : {
9 heikki.linnakangas@i 2823 :UNC 0 : return page1 < page2;
2824 : : }
2825 : :
2826 : : /*
2827 : : * Decide which of two MultiXactIds is earlier.
2828 : : *
2829 : : * XXX do we need to do something special for InvalidMultiXactId?
2830 : : * (Doesn't look like it.)
2831 : : */
2832 : : bool
7539 tgl@sss.pgh.pa.us 2833 :CBC 852964 : MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
2834 : : {
7369 bruce@momjian.us 2835 : 852964 : int32 diff = (int32) (multi1 - multi2);
2836 : :
7539 tgl@sss.pgh.pa.us 2837 : 852964 : return (diff < 0);
2838 : : }
2839 : :
2840 : : /*
2841 : : * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
2842 : : *
2843 : : * XXX do we need to do something special for InvalidMultiXactId?
2844 : : * (Doesn't look like it.)
2845 : : */
2846 : : bool
4403 alvherre@alvh.no-ip. 2847 : 5931 : MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
2848 : : {
2849 : 5931 : int32 diff = (int32) (multi1 - multi2);
2850 : :
2851 : 5931 : return (diff <= 0);
2852 : : }
2853 : :
2854 : :
2855 : : /*
2856 : : * Write a TRUNCATE xlog record
2857 : : *
2858 : : * We must flush the xlog record to disk before returning --- see notes in
2859 : : * TruncateCLOG().
2860 : : */
2861 : : static void
3736 andres@anarazel.de 2862 :UBC 0 : WriteMTruncateXlogRec(Oid oldestMultiDB,
2863 : : MultiXactId startTruncOff, MultiXactId endTruncOff,
2864 : : MultiXactOffset startTruncMemb, MultiXactOffset endTruncMemb)
2865 : : {
2866 : : XLogRecPtr recptr;
2867 : : xl_multixact_truncate xlrec;
2868 : :
2869 : 0 : xlrec.oldestMultiDB = oldestMultiDB;
2870 : :
2871 : 0 : xlrec.startTruncOff = startTruncOff;
2872 : 0 : xlrec.endTruncOff = endTruncOff;
2873 : :
2874 : 0 : xlrec.startTruncMemb = startTruncMemb;
2875 : 0 : xlrec.endTruncMemb = endTruncMemb;
2876 : :
2877 : 0 : XLogBeginInsert();
310 peter@eisentraut.org 2878 : 0 : XLogRegisterData(&xlrec, SizeOfMultiXactTruncate);
3736 andres@anarazel.de 2879 : 0 : recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID);
2880 : 0 : XLogFlush(recptr);
2881 : 0 : }
2882 : :
2883 : : /*
2884 : : * MULTIXACT resource manager's routines
2885 : : */
2886 : : void
4046 heikki.linnakangas@i 2887 :CBC 2 : multixact_redo(XLogReaderState *record)
2888 : : {
2889 : 2 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2890 : :
2891 : : /* Backup blocks are not used in multixact records */
2892 [ - + ]: 2 : Assert(!XLogRecHasAnyBlockRefs(record));
2893 : :
7498 tgl@sss.pgh.pa.us 2894 [ - + ]: 2 : if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
2895 : : {
2896 : : int64 pageno;
2897 : :
750 akorotkov@postgresql 2898 :UBC 0 : memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
164 alvherre@kurilemu.de 2899 :UNC 0 : SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno);
2900 : : }
7498 tgl@sss.pgh.pa.us 2901 [ - + ]:CBC 2 : else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
2902 : : {
2903 : : int64 pageno;
2904 : :
750 akorotkov@postgresql 2905 :LBC (1) : memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
164 alvherre@kurilemu.de 2906 :UNC 0 : SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno);
2907 : : }
7498 tgl@sss.pgh.pa.us 2908 [ + - ]:CBC 2 : else if (info == XLOG_MULTIXACT_CREATE_ID)
2909 : : {
4712 alvherre@alvh.no-ip. 2910 : 2 : xl_multixact_create *xlrec =
944 tgl@sss.pgh.pa.us 2911 : 2 : (xl_multixact_create *) XLogRecGetData(record);
2912 : : TransactionId max_xid;
2913 : : int i;
2914 : :
2915 : : /* Store the data back into the SLRU files */
4712 alvherre@alvh.no-ip. 2916 : 2 : RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
2917 : 2 : xlrec->members);
2918 : :
2919 : : /* Make sure nextMXact/nextOffset are beyond what this record has */
6 heikki.linnakangas@i 2920 :GNC 2 : MultiXactAdvanceNextMXact(NextMultiXactId(xlrec->mid),
4712 alvherre@alvh.no-ip. 2921 :CBC 2 : xlrec->moff + xlrec->nmembers);
2922 : :
2923 : : /*
2924 : : * Make sure nextXid is beyond any XID mentioned in the record. This
2925 : : * should be unnecessary, since any XID found here ought to have other
2926 : : * evidence in the XLOG, but let's be safe.
2927 : : */
4046 heikki.linnakangas@i 2928 : 2 : max_xid = XLogRecGetXid(record);
4712 alvherre@alvh.no-ip. 2929 [ + + ]: 6 : for (i = 0; i < xlrec->nmembers; i++)
2930 : : {
2931 [ - + ]: 4 : if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
4712 alvherre@alvh.no-ip. 2932 :UBC 0 : max_xid = xlrec->members[i].xid;
2933 : : }
2934 : :
2457 tmunro@postgresql.or 2935 :CBC 2 : AdvanceNextFullTransactionIdPastXid(max_xid);
2936 : : }
3736 andres@anarazel.de 2937 [ # # ]:UBC 0 : else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
2938 : : {
2939 : : xl_multixact_truncate xlrec;
2940 : : int64 pageno;
2941 : :
2942 : 0 : memcpy(&xlrec, XLogRecGetData(record),
2943 : : SizeOfMultiXactTruncate);
2944 : :
2945 [ # # ]: 0 : elog(DEBUG1, "replaying multixact truncation: "
2946 : : "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
2947 : : "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
2948 : : xlrec.startTruncOff, xlrec.endTruncOff,
2949 : : MultiXactIdToOffsetSegment(xlrec.startTruncOff),
2950 : : MultiXactIdToOffsetSegment(xlrec.endTruncOff),
2951 : : xlrec.startTruncMemb, xlrec.endTruncMemb,
2952 : : MXOffsetToMemberSegment(xlrec.startTruncMemb),
2953 : : MXOffsetToMemberSegment(xlrec.endTruncMemb));
2954 : :
2955 : : /* should not be required, but more than cheap enough */
2956 : 0 : LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
2957 : :
2958 : : /*
2959 : : * Advance the horizon values, so they're current at the end of
2960 : : * recovery.
2961 : : */
9 heikki.linnakangas@i 2962 :UNC 0 : SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB);
2963 : :
3736 andres@anarazel.de 2964 :UBC 0 : PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
2965 : :
2966 : : /*
2967 : : * During XLOG replay, latest_page_number isn't necessarily set up
2968 : : * yet; insert a suitable value to bypass the sanity test in
2969 : : * SimpleLruTruncate.
2970 : : */
2971 : 0 : pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
681 alvherre@alvh.no-ip. 2972 : 0 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2973 : : pageno);
3736 andres@anarazel.de 2974 : 0 : PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff);
2975 : :
2976 : 0 : LWLockRelease(MultiXactTruncationLock);
2977 : : }
2978 : : else
7498 tgl@sss.pgh.pa.us 2979 [ # # ]: 0 : elog(PANIC, "multixact_redo: unknown op code %u", info);
7498 tgl@sss.pgh.pa.us 2980 :CBC 2 : }
2981 : :
2982 : : /*
2983 : : * Entrypoint for sync.c to sync offsets files.
2984 : : */
2985 : : int
1910 tmunro@postgresql.or 2986 :UBC 0 : multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
2987 : : {
2988 : 0 : return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
2989 : : }
2990 : :
2991 : : /*
2992 : : * Entrypoint for sync.c to sync members files.
2993 : : */
2994 : : int
2995 : 0 : multixactmemberssyncfiletag(const FileTag *ftag, char *path)
2996 : : {
2997 : 0 : return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
2998 : : }
|