Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * smgr.c
4 : : * public interface routines to storage manager switch.
5 : : *
6 : : * All file system operations on relations dispatch through these routines.
7 : : * An SMgrRelation represents physical on-disk relation files that are open
8 : : * for reading and writing.
9 : : *
10 : : * When a relation is first accessed through the relation cache, the
11 : : * corresponding SMgrRelation entry is opened by calling smgropen(), and the
12 : : * reference is stored in the relation cache entry.
13 : : *
14 : : * Accesses that don't go through the relation cache open the SMgrRelation
15 : : * directly. That includes flushing buffers from the buffer cache, as well as
16 : : * all accesses in auxiliary processes like the checkpointer or the WAL redo
17 : : * in the startup process.
18 : : *
19 : : * Operations like CREATE, DROP, ALTER TABLE also hold SMgrRelation references
20 : : * independent of the relation cache. They need to prepare the physical files
21 : : * before updating the relation cache.
22 : : *
23 : : * There is a hash table that holds all the SMgrRelation entries in the
24 : : * backend. If you call smgropen() twice for the same rel locator, you get a
25 : : * reference to the same SMgrRelation. The reference is valid until the end of
26 : : * transaction. This makes repeated access to the same relation efficient,
27 : : * and allows caching things like the relation size in the SMgrRelation entry.
28 : : *
29 : : * At end of transaction, all SMgrRelation entries that haven't been pinned
30 : : * are removed. An SMgrRelation can hold kernel file system descriptors for
31 : : * the underlying files, and we'd like to close those reasonably soon if the
32 : : * file gets deleted. The SMgrRelations references held by the relcache are
33 : : * pinned to prevent them from being closed.
34 : : *
35 : : * There is another mechanism to close file descriptors early:
36 : : * PROCSIGNAL_BARRIER_SMGRRELEASE. It is a request to immediately close all
37 : : * file descriptors. Upon receiving that signal, the backend closes all file
38 : : * descriptors held open by SMgrRelations, but because it can happen in the
39 : : * middle of a transaction, we cannot destroy the SMgrRelation objects
40 : : * themselves, as there could pointers to them in active use. See
41 : : * smgrrelease() and smgrreleaseall().
42 : : *
43 : : * NB: We need to hold interrupts across most of the functions in this file,
44 : : * as otherwise interrupt processing, e.g. due to a < ERROR elog/ereport, can
45 : : * trigger procsignal processing, which in turn can trigger
46 : : * smgrreleaseall(). Most of the relevant code is not reentrant. It seems
47 : : * better to put the HOLD_INTERRUPTS()/RESUME_INTERRUPTS() here, instead of
48 : : * trying to push them down to md.c where possible: For one, every smgr
49 : : * implementation would be vulnerable, for another, a good bit of smgr.c code
50 : : * itself is affected too. Eventually we might want a more targeted solution,
51 : : * allowing e.g. a networked smgr implementation to be interrupted, but many
52 : : * other, more complicated, problems would need to be fixed for that to be
53 : : * viable (e.g. smgr.c is often called with interrupts already held).
54 : : *
55 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
56 : : * Portions Copyright (c) 1994, Regents of the University of California
57 : : *
58 : : *
59 : : * IDENTIFICATION
60 : : * src/backend/storage/smgr/smgr.c
61 : : *
62 : : *-------------------------------------------------------------------------
63 : : */
64 : : #include "postgres.h"
65 : :
66 : : #include "access/xlogutils.h"
67 : : #include "lib/ilist.h"
68 : : #include "miscadmin.h"
69 : : #include "storage/aio.h"
70 : : #include "storage/bufmgr.h"
71 : : #include "storage/ipc.h"
72 : : #include "storage/md.h"
73 : : #include "storage/smgr.h"
74 : : #include "utils/hsearch.h"
75 : : #include "utils/inval.h"
76 : :
77 : :
78 : : /*
79 : : * This struct of function pointers defines the API between smgr.c and
80 : : * any individual storage manager module. Note that smgr subfunctions are
81 : : * generally expected to report problems via elog(ERROR). An exception is
82 : : * that smgr_unlink should use elog(WARNING), rather than erroring out,
83 : : * because we normally unlink relations during post-commit/abort cleanup,
84 : : * and so it's too late to raise an error. Also, various conditions that
85 : : * would normally be errors should be allowed during bootstrap and/or WAL
86 : : * recovery --- see comments in md.c for details.
87 : : */
88 : : typedef struct f_smgr
89 : : {
90 : : void (*smgr_init) (void); /* may be NULL */
91 : : void (*smgr_shutdown) (void); /* may be NULL */
92 : : void (*smgr_open) (SMgrRelation reln);
93 : : void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
94 : : void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
95 : : bool isRedo);
96 : : bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
97 : : void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
98 : : bool isRedo);
99 : : void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
100 : : BlockNumber blocknum, const void *buffer, bool skipFsync);
101 : : void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
102 : : BlockNumber blocknum, int nblocks, bool skipFsync);
103 : : bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
104 : : BlockNumber blocknum, int nblocks);
105 : : uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum,
106 : : BlockNumber blocknum);
107 : : void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
108 : : BlockNumber blocknum,
109 : : void **buffers, BlockNumber nblocks);
110 : : void (*smgr_startreadv) (PgAioHandle *ioh,
111 : : SMgrRelation reln, ForkNumber forknum,
112 : : BlockNumber blocknum,
113 : : void **buffers, BlockNumber nblocks);
114 : : void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum,
115 : : BlockNumber blocknum,
116 : : const void **buffers, BlockNumber nblocks,
117 : : bool skipFsync);
118 : : void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
119 : : BlockNumber blocknum, BlockNumber nblocks);
120 : : BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
121 : : void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
122 : : BlockNumber old_blocks, BlockNumber nblocks);
123 : : void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
124 : : void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum);
125 : : int (*smgr_fd) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off);
126 : : } f_smgr;
127 : :
128 : : static const f_smgr smgrsw[] = {
129 : : /* magnetic disk */
130 : : {
131 : : .smgr_init = mdinit,
132 : : .smgr_shutdown = NULL,
133 : : .smgr_open = mdopen,
134 : : .smgr_close = mdclose,
135 : : .smgr_create = mdcreate,
136 : : .smgr_exists = mdexists,
137 : : .smgr_unlink = mdunlink,
138 : : .smgr_extend = mdextend,
139 : : .smgr_zeroextend = mdzeroextend,
140 : : .smgr_prefetch = mdprefetch,
141 : : .smgr_maxcombine = mdmaxcombine,
142 : : .smgr_readv = mdreadv,
143 : : .smgr_startreadv = mdstartreadv,
144 : : .smgr_writev = mdwritev,
145 : : .smgr_writeback = mdwriteback,
146 : : .smgr_nblocks = mdnblocks,
147 : : .smgr_truncate = mdtruncate,
148 : : .smgr_immedsync = mdimmedsync,
149 : : .smgr_registersync = mdregistersync,
150 : : .smgr_fd = mdfd,
151 : : }
152 : : };
153 : :
154 : : static const int NSmgr = lengthof(smgrsw);
155 : :
156 : : /*
157 : : * Each backend has a hashtable that stores all extant SMgrRelation objects.
158 : : * In addition, "unpinned" SMgrRelation objects are chained together in a list.
159 : : */
160 : : static HTAB *SMgrRelationHash = NULL;
161 : :
162 : : static dlist_head unpinned_relns;
163 : :
164 : : /* local function prototypes */
165 : : static void smgrshutdown(int code, Datum arg);
166 : : static void smgrdestroy(SMgrRelation reln);
167 : :
168 : : static void smgr_aio_reopen(PgAioHandle *ioh);
169 : : static char *smgr_aio_describe_identity(const PgAioTargetData *sd);
170 : :
171 : :
172 : : const PgAioTargetInfo aio_smgr_target_info = {
173 : : .name = "smgr",
174 : : .reopen = smgr_aio_reopen,
175 : : .describe_identity = smgr_aio_describe_identity,
176 : : };
177 : :
178 : :
179 : : /*
180 : : * smgrinit(), smgrshutdown() -- Initialize or shut down storage
181 : : * managers.
182 : : *
183 : : * Note: smgrinit is called during backend startup (normal or standalone
184 : : * case), *not* during postmaster start. Therefore, any resources created
185 : : * here or destroyed in smgrshutdown are backend-local.
186 : : */
187 : : void
8432 tgl@sss.pgh.pa.us 188 :CBC 18768 : smgrinit(void)
189 : : {
190 : : int i;
191 : :
170 andres@anarazel.de 192 : 18768 : HOLD_INTERRUPTS();
193 : :
10226 bruce@momjian.us 194 [ + + ]: 37536 : for (i = 0; i < NSmgr; i++)
195 : : {
196 [ + - ]: 18768 : if (smgrsw[i].smgr_init)
2921 peter_e@gmx.net 197 : 18768 : smgrsw[i].smgr_init();
198 : : }
199 : :
170 andres@anarazel.de 200 [ - + ]: 18768 : RESUME_INTERRUPTS();
201 : :
202 : : /* register the shutdown proc */
9105 peter_e@gmx.net 203 : 18768 : on_proc_exit(smgrshutdown, 0);
10651 scrappy@hub.org 204 : 18768 : }
205 : :
206 : : /*
207 : : * on_proc_exit hook for smgr cleanup during backend shutdown
208 : : */
209 : : static void
7939 peter_e@gmx.net 210 : 18768 : smgrshutdown(int code, Datum arg)
211 : : {
212 : : int i;
213 : :
170 andres@anarazel.de 214 : 18768 : HOLD_INTERRUPTS();
215 : :
10226 bruce@momjian.us 216 [ + + ]: 37536 : for (i = 0; i < NSmgr; i++)
217 : : {
218 [ - + ]: 18768 : if (smgrsw[i].smgr_shutdown)
2921 peter_e@gmx.net 219 :UBC 0 : smgrsw[i].smgr_shutdown();
220 : : }
221 : :
170 andres@anarazel.de 222 [ - + ]:CBC 18768 : RESUME_INTERRUPTS();
10651 scrappy@hub.org 223 : 18768 : }
224 : :
225 : : /*
226 : : * smgropen() -- Return an SMgrRelation object, creating it if need be.
227 : : *
228 : : * In versions of PostgreSQL prior to 17, this function returned an object
229 : : * with no defined lifetime. Now, however, the object remains valid for the
230 : : * lifetime of the transaction, up to the point where AtEOXact_SMgr() is
231 : : * called, making it much easier for callers to know for how long they can
232 : : * hold on to a pointer to the returned object. If this function is called
233 : : * outside of a transaction, the object remains valid until smgrdestroy() or
234 : : * smgrdestroyall() is called. Background processes that use smgr but not
235 : : * transactions typically do this once per checkpoint cycle.
236 : : *
237 : : * This does not attempt to actually open the underlying files.
238 : : */
239 : : SMgrRelation
552 heikki.linnakangas@i 240 : 13517685 : smgropen(RelFileLocator rlocator, ProcNumber backend)
241 : : {
242 : : RelFileLocatorBackend brlocator;
243 : : SMgrRelation reln;
244 : : bool found;
245 : :
638 246 [ - + ]: 13517685 : Assert(RelFileNumberIsValid(rlocator.relNumber));
247 : :
170 andres@anarazel.de 248 : 13517685 : HOLD_INTERRUPTS();
249 : :
7879 tgl@sss.pgh.pa.us 250 [ + + ]: 13517685 : if (SMgrRelationHash == NULL)
251 : : {
252 : : /* First time through: initialize the hash table */
253 : : HASHCTL ctl;
254 : :
1074 rhaas@postgresql.org 255 : 16401 : ctl.keysize = sizeof(RelFileLocatorBackend);
7879 tgl@sss.pgh.pa.us 256 : 16401 : ctl.entrysize = sizeof(SMgrRelationData);
257 : 16401 : SMgrRelationHash = hash_create("smgr relation table", 400,
258 : : &ctl, HASH_ELEM | HASH_BLOBS);
584 heikki.linnakangas@i 259 : 16401 : dlist_init(&unpinned_relns);
260 : : }
261 : :
262 : : /* Look up or create an entry */
1158 rhaas@postgresql.org 263 : 13517685 : brlocator.locator = rlocator;
264 : 13517685 : brlocator.backend = backend;
7879 tgl@sss.pgh.pa.us 265 : 13517685 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
266 : : &brlocator,
267 : : HASH_ENTER, &found);
268 : :
269 : : /* Initialize it if not present before */
270 [ + + ]: 13517685 : if (!found)
271 : : {
272 : : /* hash_search already filled in the lookup key */
5688 273 : 947375 : reln->smgr_targblock = InvalidBlockNumber;
1863 tmunro@postgresql.or 274 [ + + ]: 4736875 : for (int i = 0; i <= MAX_FORKNUM; ++i)
275 : 3789500 : reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
7879 tgl@sss.pgh.pa.us 276 : 947375 : reln->smgr_which = 0; /* we only have md.c at present */
277 : :
278 : : /* it is not pinned yet */
584 heikki.linnakangas@i 279 : 947375 : reln->pincount = 0;
280 : 947375 : dlist_push_tail(&unpinned_relns, &reln->node);
281 : :
282 : : /* implementation-specific initialization */
172 andres@anarazel.de 283 : 947375 : smgrsw[reln->smgr_which].smgr_open(reln);
284 : : }
285 : :
170 286 [ - + ]: 13517685 : RESUME_INTERRUPTS();
287 : :
7879 tgl@sss.pgh.pa.us 288 : 13517685 : return reln;
289 : : }
290 : :
291 : : /*
292 : : * smgrpin() -- Prevent an SMgrRelation object from being destroyed at end of
293 : : * transaction
294 : : */
295 : : void
584 heikki.linnakangas@i 296 : 771728 : smgrpin(SMgrRelation reln)
297 : : {
298 [ + - ]: 771728 : if (reln->pincount == 0)
2355 tomas.vondra@postgre 299 : 771728 : dlist_delete(&reln->node);
584 heikki.linnakangas@i 300 : 771728 : reln->pincount++;
7544 tgl@sss.pgh.pa.us 301 : 771728 : }
302 : :
303 : : /*
304 : : * smgrunpin() -- Allow an SMgrRelation object to be destroyed at end of
305 : : * transaction
306 : : *
307 : : * The object remains valid, but if there are no other pins on it, it is moved
308 : : * to the unpinned list where it will be destroyed by AtEOXact_SMgr().
309 : : */
310 : : void
584 heikki.linnakangas@i 311 : 198189 : smgrunpin(SMgrRelation reln)
312 : : {
313 [ - + ]: 198189 : Assert(reln->pincount > 0);
314 : 198189 : reln->pincount--;
315 [ + - ]: 198189 : if (reln->pincount == 0)
316 : 198189 : dlist_push_tail(&unpinned_relns, &reln->node);
6235 317 : 198189 : }
318 : :
319 : : /*
320 : : * smgrdestroy() -- Delete an SMgrRelation object.
321 : : */
322 : : static void
584 323 : 295715 : smgrdestroy(SMgrRelation reln)
324 : : {
325 : : ForkNumber forknum;
326 : :
327 [ - + ]: 295715 : Assert(reln->pincount == 0);
328 : :
170 andres@anarazel.de 329 : 295715 : HOLD_INTERRUPTS();
330 : :
6235 heikki.linnakangas@i 331 [ + + ]: 1478575 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
2921 peter_e@gmx.net 332 : 1182860 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
333 : :
584 heikki.linnakangas@i 334 : 295715 : dlist_delete(&reln->node);
335 : :
7879 tgl@sss.pgh.pa.us 336 [ - + ]: 295715 : if (hash_search(SMgrRelationHash,
943 peter@eisentraut.org 337 : 295715 : &(reln->smgr_rlocator),
338 : : HASH_REMOVE, NULL) == NULL)
7879 tgl@sss.pgh.pa.us 339 [ # # ]:UBC 0 : elog(ERROR, "SMgrRelation hashtable corrupted");
340 : :
170 andres@anarazel.de 341 [ - + ]:CBC 295715 : RESUME_INTERRUPTS();
7879 tgl@sss.pgh.pa.us 342 : 295715 : }
343 : :
344 : : /*
345 : : * smgrrelease() -- Release all resources used by this object.
346 : : *
347 : : * The object remains valid.
348 : : */
349 : : void
1218 tmunro@postgresql.or 350 : 388589 : smgrrelease(SMgrRelation reln)
351 : : {
170 andres@anarazel.de 352 : 388589 : HOLD_INTERRUPTS();
353 : :
1218 tmunro@postgresql.or 354 [ + + ]: 1942945 : for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
355 : : {
356 : 1554356 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
357 : 1554356 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
358 : : }
751 359 : 388589 : reln->smgr_targblock = InvalidBlockNumber;
360 : :
170 andres@anarazel.de 361 [ - + ]: 388589 : RESUME_INTERRUPTS();
1218 tmunro@postgresql.or 362 : 388589 : }
363 : :
364 : : /*
365 : : * smgrclose() -- Close an SMgrRelation object.
366 : : *
367 : : * The SMgrRelation reference should not be used after this call. However,
368 : : * because we don't keep track of the references returned by smgropen(), we
369 : : * don't know if there are other references still pointing to the same object,
370 : : * so we cannot remove the SMgrRelation object yet. Therefore, this is just a
371 : : * synonym for smgrrelease() at the moment.
372 : : */
373 : : void
584 heikki.linnakangas@i 374 : 279718 : smgrclose(SMgrRelation reln)
375 : : {
376 : 279718 : smgrrelease(reln);
377 : 279718 : }
378 : :
379 : : /*
380 : : * smgrdestroyall() -- Release resources used by all unpinned objects.
381 : : *
382 : : * It must be known that there are no pointers to SMgrRelations, other than
383 : : * those pinned with smgrpin().
384 : : */
385 : : void
386 : 320628 : smgrdestroyall(void)
387 : : {
388 : : dlist_mutable_iter iter;
389 : :
390 : : /* seems unsafe to accept interrupts while in a dlist_foreach_modify() */
170 andres@anarazel.de 391 : 320628 : HOLD_INTERRUPTS();
392 : :
393 : : /*
394 : : * Zap all unpinned SMgrRelations. We rely on smgrdestroy() to remove
395 : : * each one from the list.
396 : : */
584 heikki.linnakangas@i 397 [ + + + + ]: 616343 : dlist_foreach_modify(iter, &unpinned_relns)
398 : : {
399 : 295715 : SMgrRelation rel = dlist_container(SMgrRelationData, node,
400 : : iter.cur);
401 : :
402 : 295715 : smgrdestroy(rel);
403 : : }
404 : :
170 andres@anarazel.de 405 [ - + ]: 320628 : RESUME_INTERRUPTS();
1218 tmunro@postgresql.or 406 : 320628 : }
407 : :
408 : : /*
409 : : * smgrreleaseall() -- Release resources used by all objects.
410 : : */
411 : : void
584 heikki.linnakangas@i 412 : 2901 : smgrreleaseall(void)
413 : : {
414 : : HASH_SEQ_STATUS status;
415 : : SMgrRelation reln;
416 : :
417 : : /* Nothing to do if hashtable not set up */
7879 tgl@sss.pgh.pa.us 418 [ + + ]: 2901 : if (SMgrRelationHash == NULL)
419 : 148 : return;
420 : :
421 : : /* seems unsafe to accept interrupts while iterating */
170 andres@anarazel.de 422 : 2753 : HOLD_INTERRUPTS();
423 : :
7879 tgl@sss.pgh.pa.us 424 : 2753 : hash_seq_init(&status, SMgrRelationHash);
425 : :
426 [ + + ]: 99542 : while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
427 : : {
584 heikki.linnakangas@i 428 : 96789 : smgrrelease(reln);
429 : : }
430 : :
170 andres@anarazel.de 431 [ - + ]: 2753 : RESUME_INTERRUPTS();
432 : : }
433 : :
434 : : /*
435 : : * smgrreleaserellocator() -- Release resources for given RelFileLocator, if
436 : : * it's open.
437 : : *
438 : : * This has the same effects as smgrrelease(smgropen(rlocator)), but avoids
439 : : * uselessly creating a hashtable entry only to drop it again when no
440 : : * such entry exists already.
441 : : */
442 : : void
584 heikki.linnakangas@i 443 : 233477 : smgrreleaserellocator(RelFileLocatorBackend rlocator)
444 : : {
445 : : SMgrRelation reln;
446 : :
447 : : /* Nothing to do if hashtable not set up */
7879 tgl@sss.pgh.pa.us 448 [ + + ]: 233477 : if (SMgrRelationHash == NULL)
449 : 10 : return;
450 : :
451 : 233467 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
452 : : &rlocator,
453 : : HASH_FIND, NULL);
454 [ + + ]: 233467 : if (reln != NULL)
584 heikki.linnakangas@i 455 : 12082 : smgrrelease(reln);
456 : : }
457 : :
458 : : /*
459 : : * smgrexists() -- Does the underlying file for a fork exist?
460 : : */
461 : : bool
462 : 499205 : smgrexists(SMgrRelation reln, ForkNumber forknum)
463 : : {
464 : : bool ret;
465 : :
170 andres@anarazel.de 466 : 499205 : HOLD_INTERRUPTS();
467 : 499205 : ret = smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
468 [ - + ]: 499205 : RESUME_INTERRUPTS();
469 : :
470 : 499205 : return ret;
471 : : }
472 : :
473 : : /*
474 : : * smgrcreate() -- Create a new relation.
475 : : *
476 : : * Given an already-created (but presumably unused) SMgrRelation,
477 : : * cause the underlying disk file or other storage for the fork
478 : : * to be created.
479 : : */
480 : : void
6135 heikki.linnakangas@i 481 : 5641912 : smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
482 : : {
170 andres@anarazel.de 483 : 5641912 : HOLD_INTERRUPTS();
2921 peter_e@gmx.net 484 : 5641912 : smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
170 andres@anarazel.de 485 [ - + ]: 5641912 : RESUME_INTERRUPTS();
10651 scrappy@hub.org 486 : 5641912 : }
487 : :
488 : : /*
489 : : * smgrdosyncall() -- Immediately sync all forks of all given relations
490 : : *
491 : : * All forks of all given relations are synced out to the store.
492 : : *
493 : : * This is equivalent to FlushRelationBuffers() for each smgr relation,
494 : : * then calling smgrimmedsync() for all forks of each relation, but it's
495 : : * significantly quicker so should be preferred when possible.
496 : : */
497 : : void
1981 noah@leadboat.com 498 : 13 : smgrdosyncall(SMgrRelation *rels, int nrels)
499 : : {
500 : 13 : int i = 0;
501 : : ForkNumber forknum;
502 : :
503 [ - + ]: 13 : if (nrels == 0)
1981 noah@leadboat.com 504 :UBC 0 : return;
505 : :
1981 noah@leadboat.com 506 :CBC 13 : FlushRelationsAllBuffers(rels, nrels);
507 : :
170 andres@anarazel.de 508 : 13 : HOLD_INTERRUPTS();
509 : :
510 : : /*
511 : : * Sync the physical file(s).
512 : : */
1981 noah@leadboat.com 513 [ + + ]: 26 : for (i = 0; i < nrels; i++)
514 : : {
515 : 13 : int which = rels[i]->smgr_which;
516 : :
517 [ + + ]: 65 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
518 : : {
519 [ + + ]: 52 : if (smgrsw[which].smgr_exists(rels[i], forknum))
520 : 17 : smgrsw[which].smgr_immedsync(rels[i], forknum);
521 : : }
522 : : }
523 : :
170 andres@anarazel.de 524 [ - + ]: 13 : RESUME_INTERRUPTS();
525 : : }
526 : :
527 : : /*
528 : : * smgrdounlinkall() -- Immediately unlink all forks of all given relations
529 : : *
530 : : * All forks of all given relations are removed from the store. This
531 : : * should not be used during transactional operations, since it can't be
532 : : * undone.
533 : : *
534 : : * If isRedo is true, it is okay for the underlying file(s) to be gone
535 : : * already.
536 : : */
537 : : void
4615 alvherre@alvh.no-ip. 538 : 13622 : smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
539 : : {
4483 bruce@momjian.us 540 : 13622 : int i = 0;
541 : : RelFileLocatorBackend *rlocators;
542 : : ForkNumber forknum;
543 : :
4615 alvherre@alvh.no-ip. 544 [ + + ]: 13622 : if (nrels == 0)
545 : 286 : return;
546 : :
547 : : /*
548 : : * It would be unsafe to process interrupts between DropRelationBuffers()
549 : : * and unlinking the underlying files. This probably should be a critical
550 : : * section, but we're not there yet.
551 : : */
170 andres@anarazel.de 552 : 13336 : HOLD_INTERRUPTS();
553 : :
554 : : /*
555 : : * Get rid of any remaining buffers for the relations. bufmgr will just
556 : : * drop them without bothering to write the contents.
557 : : */
1152 rhaas@postgresql.org 558 : 13336 : DropRelationsAllBuffers(rels, nrels);
559 : :
560 : : /*
561 : : * create an array which contains all relations to be dropped, and close
562 : : * each relation's forks at the smgr level while at it
563 : : */
1158 564 : 13336 : rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels);
4615 alvherre@alvh.no-ip. 565 [ + + ]: 58532 : for (i = 0; i < nrels; i++)
566 : : {
1158 rhaas@postgresql.org 567 : 45196 : RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator;
4615 alvherre@alvh.no-ip. 568 : 45196 : int which = rels[i]->smgr_which;
569 : :
1158 rhaas@postgresql.org 570 : 45196 : rlocators[i] = rlocator;
571 : :
572 : : /* Close the forks at smgr level */
4615 alvherre@alvh.no-ip. 573 [ + + ]: 225980 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
2921 peter_e@gmx.net 574 : 180784 : smgrsw[which].smgr_close(rels[i], forknum);
575 : : }
576 : :
577 : : /*
578 : : * Send a shared-inval message to force other backends to close any
579 : : * dangling smgr references they may have for these rels. We should do
580 : : * this before starting the actual unlinking, in case we fail partway
581 : : * through that step. Note that the sinval messages will eventually come
582 : : * back to this backend, too, and thereby provide a backstop that we
583 : : * closed our own smgr rel.
584 : : */
4615 alvherre@alvh.no-ip. 585 [ + + ]: 58532 : for (i = 0; i < nrels; i++)
1158 rhaas@postgresql.org 586 : 45196 : CacheInvalidateSmgr(rlocators[i]);
587 : :
588 : : /*
589 : : * Delete the physical file(s).
590 : : *
591 : : * Note: smgr_unlink must treat deletion failure as a WARNING, not an
592 : : * ERROR, because we've already decided to commit or abort the current
593 : : * xact.
594 : : */
595 : :
4615 alvherre@alvh.no-ip. 596 [ + + ]: 58532 : for (i = 0; i < nrels; i++)
597 : : {
4483 bruce@momjian.us 598 : 45196 : int which = rels[i]->smgr_which;
599 : :
4615 alvherre@alvh.no-ip. 600 [ + + ]: 225980 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
1158 rhaas@postgresql.org 601 : 180784 : smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
602 : : }
603 : :
604 : 13336 : pfree(rlocators);
605 : :
170 andres@anarazel.de 606 [ - + ]: 13336 : RESUME_INTERRUPTS();
607 : : }
608 : :
609 : :
610 : : /*
611 : : * smgrextend() -- Add a new block to a file.
612 : : *
613 : : * The semantics are nearly the same as smgrwrite(): write at the
614 : : * specified position. However, this is to be used for the case of
615 : : * extending a relation (i.e., blocknum is at or beyond the current
616 : : * EOF). Note that we assume writing a block beyond current EOF
617 : : * causes intervening file space to become filled with zeroes.
618 : : */
619 : : void
5931 bruce@momjian.us 620 : 117525 : smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
621 : : const void *buffer, bool skipFsync)
622 : : {
170 andres@anarazel.de 623 : 117525 : HOLD_INTERRUPTS();
624 : :
2921 peter_e@gmx.net 625 : 117525 : smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
626 : : buffer, skipFsync);
627 : :
628 : : /*
629 : : * Normally we expect this to increase nblocks by one, but if the cached
630 : : * value isn't as expected, just invalidate it so the next call asks the
631 : : * kernel.
632 : : */
1863 tmunro@postgresql.or 633 [ + + ]: 117525 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
634 : 58673 : reln->smgr_cached_nblocks[forknum] = blocknum + 1;
635 : : else
636 : 58852 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
637 : :
170 andres@anarazel.de 638 [ - + ]: 117525 : RESUME_INTERRUPTS();
10651 scrappy@hub.org 639 : 117525 : }
640 : :
641 : : /*
642 : : * smgrzeroextend() -- Add new zeroed out blocks to a file.
643 : : *
644 : : * Similar to smgrextend(), except the relation can be extended by
645 : : * multiple blocks at once and the added blocks will be filled with
646 : : * zeroes.
647 : : */
648 : : void
885 andres@anarazel.de 649 : 207816 : smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
650 : : int nblocks, bool skipFsync)
651 : : {
170 652 : 207816 : HOLD_INTERRUPTS();
653 : :
885 654 : 207816 : smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
655 : : nblocks, skipFsync);
656 : :
657 : : /*
658 : : * Normally we expect this to increase the fork size by nblocks, but if
659 : : * the cached value isn't as expected, just invalidate it so the next call
660 : : * asks the kernel.
661 : : */
662 [ + - ]: 207816 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
663 : 207816 : reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
664 : : else
885 andres@anarazel.de 665 :UBC 0 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
666 : :
170 andres@anarazel.de 667 [ - + ]:CBC 207816 : RESUME_INTERRUPTS();
885 668 : 207816 : }
669 : :
670 : : /*
671 : : * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
672 : : *
673 : : * In recovery only, this can return false to indicate that a file
674 : : * doesn't exist (presumably it has been dropped by a later WAL
675 : : * record).
676 : : */
677 : : bool
630 tmunro@postgresql.or 678 : 8006 : smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
679 : : int nblocks)
680 : : {
681 : : bool ret;
682 : :
170 andres@anarazel.de 683 : 8006 : HOLD_INTERRUPTS();
684 : 8006 : ret = smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
685 [ - + ]: 8006 : RESUME_INTERRUPTS();
686 : :
687 : 8006 : return ret;
688 : : }
689 : :
690 : : /*
691 : : * smgrmaxcombine() - Return the maximum number of total blocks that can be
692 : : * combined with an IO starting at blocknum.
693 : : *
694 : : * The returned value includes the IO for blocknum itself.
695 : : */
696 : : uint32
333 697 : 31935 : smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
698 : : BlockNumber blocknum)
699 : : {
700 : : uint32 ret;
701 : :
170 702 : 31935 : HOLD_INTERRUPTS();
703 : 31935 : ret = smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum);
704 [ - + ]: 31935 : RESUME_INTERRUPTS();
705 : :
706 : 31935 : return ret;
707 : : }
708 : :
709 : : /*
710 : : * smgrreadv() -- read a particular block range from a relation into the
711 : : * supplied buffers.
712 : : *
713 : : * This routine is called from the buffer manager in order to
714 : : * instantiate pages in the shared buffer cache. All storage managers
715 : : * return pages in the format that POSTGRES expects.
716 : : *
717 : : * If more than one block is intended to be read, callers need to use
718 : : * smgrmaxcombine() to check how many blocks can be combined into one IO.
719 : : */
720 : : void
628 tmunro@postgresql.or 721 : 598 : smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
722 : : void **buffers, BlockNumber nblocks)
723 : : {
170 andres@anarazel.de 724 : 598 : HOLD_INTERRUPTS();
628 tmunro@postgresql.or 725 : 598 : smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers,
726 : : nblocks);
170 andres@anarazel.de 727 [ - + ]: 598 : RESUME_INTERRUPTS();
10651 scrappy@hub.org 728 : 598 : }
729 : :
730 : : /*
731 : : * smgrstartreadv() -- asynchronous version of smgrreadv()
732 : : *
733 : : * This starts an asynchronous readv IO using the IO handle `ioh`. Other than
734 : : * `ioh` all parameters are the same as smgrreadv().
735 : : *
736 : : * Completion callbacks above smgr will be passed the result as the number of
737 : : * successfully read blocks if the read [partially] succeeds (Buffers for
738 : : * blocks not successfully read might bear unspecified modifications, up to
739 : : * the full nblocks). This maintains the abstraction that smgr operates on the
740 : : * level of blocks, rather than bytes.
741 : : *
742 : : * Compared to smgrreadv(), more responsibilities fall on the caller:
743 : : * - Partial reads need to be handled by the caller re-issuing IO for the
744 : : * unread blocks
745 : : * - smgr will ereport(LOG_SERVER_ONLY) some problems, but higher layers are
746 : : * responsible for pgaio_result_report() to mirror that news to the user (if
747 : : * the IO results in PGAIO_RS_WARNING) or abort the (sub)transaction (if
748 : : * PGAIO_RS_ERROR).
749 : : * - Under Valgrind, the "buffers" memory may or may not change status to
750 : : * DEFINED, depending on io_method and concurrent activity.
751 : : */
752 : : void
161 andres@anarazel.de 753 : 1245727 : smgrstartreadv(PgAioHandle *ioh,
754 : : SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
755 : : void **buffers, BlockNumber nblocks)
756 : : {
757 : 1245727 : HOLD_INTERRUPTS();
758 : 1245727 : smgrsw[reln->smgr_which].smgr_startreadv(ioh,
759 : : reln, forknum, blocknum, buffers,
760 : : nblocks);
761 [ - + ]: 1245712 : RESUME_INTERRUPTS();
762 : 1245712 : }
763 : :
764 : : /*
765 : : * smgrwritev() -- Write the supplied buffers out.
766 : : *
767 : : * This is to be used only for updating already-existing blocks of a
768 : : * relation (ie, those before the current EOF). To extend a relation,
769 : : * use smgrextend().
770 : : *
771 : : * This is not a synchronous write -- the block is not necessarily
772 : : * on disk at return, only dumped out to the kernel. However,
773 : : * provisions will be made to fsync the write before the next checkpoint.
774 : : *
775 : : * NB: The mechanism to ensure fsync at next checkpoint assumes that there is
776 : : * something that prevents a concurrent checkpoint from "racing ahead" of the
777 : : * write. One way to prevent that is by holding a lock on the buffer; the
778 : : * buffer manager's writes are protected by that. The bulk writer facility
779 : : * in bulk_write.c checks the redo pointer and calls smgrimmedsync() if a
780 : : * checkpoint happened; that relies on the fact that no other backend can be
781 : : * concurrently modifying the page.
782 : : *
783 : : * skipFsync indicates that the caller will make other provisions to
784 : : * fsync the relation, so we needn't bother. Temporary relations also
785 : : * do not require fsync.
786 : : *
787 : : * If more than one block is intended to be read, callers need to use
788 : : * smgrmaxcombine() to check how many blocks can be combined into one IO.
789 : : */
790 : : void
628 tmunro@postgresql.or 791 : 557130 : smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
792 : : const void **buffers, BlockNumber nblocks, bool skipFsync)
793 : : {
170 andres@anarazel.de 794 : 557130 : HOLD_INTERRUPTS();
628 tmunro@postgresql.or 795 : 557130 : smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum,
796 : : buffers, nblocks, skipFsync);
170 andres@anarazel.de 797 [ - + ]: 557130 : RESUME_INTERRUPTS();
9091 vadim4o@yahoo.com 798 : 557130 : }
799 : :
800 : : /*
801 : : * smgrwriteback() -- Trigger kernel writeback for the supplied range of
802 : : * blocks.
803 : : */
804 : : void
3487 andres@anarazel.de 805 :UBC 0 : smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
806 : : BlockNumber nblocks)
807 : : {
170 808 : 0 : HOLD_INTERRUPTS();
2921 peter_e@gmx.net 809 : 0 : smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
810 : : nblocks);
170 andres@anarazel.de 811 [ # # ]: 0 : RESUME_INTERRUPTS();
3487 812 : 0 : }
813 : :
814 : : /*
815 : : * smgrnblocks() -- Calculate the number of blocks in the
816 : : * supplied relation.
817 : : */
818 : : BlockNumber
6235 heikki.linnakangas@i 819 :CBC 7672982 : smgrnblocks(SMgrRelation reln, ForkNumber forknum)
820 : : {
821 : : BlockNumber result;
822 : :
823 : : /* Check and return if we get the cached value for the number of blocks. */
1698 akapila@postgresql.o 824 : 7672982 : result = smgrnblocks_cached(reln, forknum);
825 [ + + ]: 7672982 : if (result != InvalidBlockNumber)
826 : 5572044 : return result;
827 : :
170 andres@anarazel.de 828 : 2100938 : HOLD_INTERRUPTS();
829 : :
1698 akapila@postgresql.o 830 : 2100938 : result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
831 : :
832 : 2100919 : reln->smgr_cached_nblocks[forknum] = result;
833 : :
170 andres@anarazel.de 834 [ - + ]: 2100919 : RESUME_INTERRUPTS();
835 : :
1698 akapila@postgresql.o 836 : 2100919 : return result;
837 : : }
838 : :
839 : : /*
840 : : * smgrnblocks_cached() -- Get the cached number of blocks in the supplied
841 : : * relation.
842 : : *
843 : : * Returns an InvalidBlockNumber when not in recovery and when the relation
844 : : * fork size is not cached.
845 : : */
846 : : BlockNumber
847 : 7693734 : smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
848 : : {
849 : : /*
850 : : * For now, this function uses cached values only in recovery due to lack
851 : : * of a shared invalidation mechanism for changes in file size. Code
852 : : * elsewhere reads smgr_cached_nblocks and copes with stale data.
853 : : */
1863 tmunro@postgresql.or 854 [ + + + + ]: 7693734 : if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber)
855 : 5574528 : return reln->smgr_cached_nblocks[forknum];
856 : :
1698 akapila@postgresql.o 857 : 2119206 : return InvalidBlockNumber;
858 : : }
859 : :
860 : : /*
861 : : * smgrtruncate() -- Truncate the given forks of supplied relation to
862 : : * each specified numbers of blocks
863 : : *
864 : : * The truncation is done immediately, so this can't be rolled back.
865 : : *
866 : : * The caller must hold AccessExclusiveLock on the relation, to ensure that
867 : : * other backends receive the smgr invalidation event that this function sends
868 : : * before they access any forks of the relation again. The current size of
869 : : * the forks should be provided in old_nblocks. This function should normally
870 : : * be called in a critical section, but the current size must be checked
871 : : * outside the critical section, and no interrupts or smgr functions relating
872 : : * to this relation should be called in between.
873 : : */
874 : : void
260 tmunro@postgresql.or 875 : 627 : smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks,
876 : : BlockNumber *old_nblocks, BlockNumber *nblocks)
877 : : {
878 : : int i;
879 : :
880 : : /*
881 : : * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
882 : : * just drop them without bothering to write the contents.
883 : : */
1152 rhaas@postgresql.org 884 : 627 : DropRelationBuffers(reln, forknum, nforks, nblocks);
885 : :
886 : : /*
887 : : * Send a shared-inval message to force other backends to close any smgr
888 : : * references they may have for this rel. This is useful because they
889 : : * might have open file pointers to segments that got removed, and/or
890 : : * smgr_targblock variables pointing past the new rel end. (The inval
891 : : * message will come back to our backend, too, causing a
892 : : * probably-unnecessary local smgr flush. But we don't expect that this
893 : : * is a performance-critical path.) As in the unlink code, we want to be
894 : : * sure the message is sent before we start changing things on-disk.
895 : : */
1158 896 : 627 : CacheInvalidateSmgr(reln->smgr_rlocator);
897 : :
898 : : /* Do the truncation */
2174 fujii@postgresql.org 899 [ + + ]: 1534 : for (i = 0; i < nforks; i++)
900 : : {
901 : : /* Make the cached size is invalid if we encounter an error. */
1863 tmunro@postgresql.or 902 : 907 : reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
903 : :
260 904 : 907 : smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i],
905 : 907 : old_nblocks[i], nblocks[i]);
906 : :
907 : : /*
908 : : * We might as well update the local smgr_cached_nblocks values. The
909 : : * smgr cache inval message that this function sent will cause other
910 : : * backends to invalidate their copies of smgr_cached_nblocks, and
911 : : * these ones too at the next command boundary. But ensure they aren't
912 : : * outright wrong until then.
913 : : */
1863 914 : 907 : reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
915 : : }
10510 vadim4o@yahoo.com 916 : 627 : }
917 : :
918 : : /*
919 : : * smgrregistersync() -- Request a relation to be sync'd at next checkpoint
920 : : *
921 : : * This can be used after calling smgrwrite() or smgrextend() with skipFsync =
922 : : * true, to register the fsyncs that were skipped earlier.
923 : : *
924 : : * Note: be mindful that a checkpoint could already have happened between the
925 : : * smgrwrite or smgrextend calls and this! In that case, the checkpoint
926 : : * already missed fsyncing this relation, and you should use smgrimmedsync
927 : : * instead. Most callers should use the bulk loading facility in bulk_write.c
928 : : * which handles all that.
929 : : */
930 : : void
561 heikki.linnakangas@i 931 : 24653 : smgrregistersync(SMgrRelation reln, ForkNumber forknum)
932 : : {
170 andres@anarazel.de 933 : 24653 : HOLD_INTERRUPTS();
561 heikki.linnakangas@i 934 : 24653 : smgrsw[reln->smgr_which].smgr_registersync(reln, forknum);
170 andres@anarazel.de 935 [ - + ]: 24653 : RESUME_INTERRUPTS();
561 heikki.linnakangas@i 936 : 24653 : }
937 : :
938 : : /*
939 : : * smgrimmedsync() -- Force the specified relation to stable storage.
940 : : *
941 : : * Synchronously force all previous writes to the specified relation
942 : : * down to disk.
943 : : *
944 : : * This is useful for building completely new relations (eg, new
945 : : * indexes). Instead of incrementally WAL-logging the index build
946 : : * steps, we can just write completed index pages to disk with smgrwrite
947 : : * or smgrextend, and then fsync the completed index file before
948 : : * committing the transaction. (This is sufficient for purposes of
949 : : * crash recovery, since it effectively duplicates forcing a checkpoint
950 : : * for the completed index. But it is *not* sufficient if one wishes
951 : : * to use the WAL log for PITR or replication purposes: in that case
952 : : * we have to make WAL entries as well.)
953 : : *
954 : : * The preceding writes should specify skipFsync = true to avoid
955 : : * duplicative fsyncs.
956 : : *
957 : : * Note that you need to do FlushRelationBuffers() first if there is
958 : : * any possibility that there are dirty buffers for the relation;
959 : : * otherwise the sync is not very meaningful.
960 : : *
961 : : * Most callers should use the bulk loading facility in bulk_write.c
962 : : * instead of calling this directly.
963 : : */
964 : : void
6235 heikki.linnakangas@i 965 :GBC 6 : smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
966 : : {
170 andres@anarazel.de 967 : 6 : HOLD_INTERRUPTS();
2921 peter_e@gmx.net 968 : 6 : smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
170 andres@anarazel.de 969 [ - + ]: 6 : RESUME_INTERRUPTS();
7766 tgl@sss.pgh.pa.us 970 : 6 : }
971 : :
972 : : /*
973 : : * Return fd for the specified block number and update *off to the appropriate
974 : : * position.
975 : : *
976 : : * This is only to be used for when AIO needs to perform the IO in a different
977 : : * process than where it was issued (e.g. in an IO worker).
978 : : */
979 : : static int
161 andres@anarazel.de 980 :CBC 448402 : smgrfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
981 : : {
982 : : int fd;
983 : :
984 : : /*
985 : : * The caller needs to prevent interrupts from being processed, otherwise
986 : : * the FD could be closed prematurely.
987 : : */
988 [ - + - - : 448402 : Assert(!INTERRUPTS_CAN_BE_PROCESSED());
- - ]
989 : :
990 : 448402 : fd = smgrsw[reln->smgr_which].smgr_fd(reln, forknum, blocknum, off);
991 : :
992 : 448402 : return fd;
993 : : }
994 : :
995 : : /*
996 : : * AtEOXact_SMgr
997 : : *
998 : : * This routine is called during transaction commit or abort (it doesn't
999 : : * particularly care which). All unpinned SMgrRelation objects are destroyed.
1000 : : *
1001 : : * We do this as a compromise between wanting transient SMgrRelations to
1002 : : * live awhile (to amortize the costs of blind writes of multiple blocks)
1003 : : * and needing them to not live forever (since we're probably holding open
1004 : : * a kernel file descriptor for the underlying file, and we need to ensure
1005 : : * that gets closed reasonably soon if the file gets deleted).
1006 : : */
1007 : : void
4707 tgl@sss.pgh.pa.us 1008 : 318614 : AtEOXact_SMgr(void)
1009 : : {
584 heikki.linnakangas@i 1010 : 318614 : smgrdestroyall();
4707 tgl@sss.pgh.pa.us 1011 : 318614 : }
1012 : :
1013 : : /*
1014 : : * This routine is called when we are ordered to release all open files by a
1015 : : * ProcSignalBarrier.
1016 : : */
1017 : : bool
1302 tmunro@postgresql.or 1018 : 597 : ProcessBarrierSmgrRelease(void)
1019 : : {
1218 1020 : 597 : smgrreleaseall();
1302 1021 : 597 : return true;
1022 : : }
1023 : :
1024 : : /*
1025 : : * Set target of the IO handle to be smgr and initialize all the relevant
1026 : : * pieces of data.
1027 : : */
1028 : : void
161 andres@anarazel.de 1029 : 1245712 : pgaio_io_set_target_smgr(PgAioHandle *ioh,
1030 : : SMgrRelationData *smgr,
1031 : : ForkNumber forknum,
1032 : : BlockNumber blocknum,
1033 : : int nblocks,
1034 : : bool skip_fsync)
1035 : : {
1036 : 1245712 : PgAioTargetData *sd = pgaio_io_get_target_data(ioh);
1037 : :
1038 : 1245712 : pgaio_io_set_target(ioh, PGAIO_TID_SMGR);
1039 : :
1040 : : /* backend is implied via IO owner */
1041 : 1245712 : sd->smgr.rlocator = smgr->smgr_rlocator.locator;
1042 : 1245712 : sd->smgr.forkNum = forknum;
1043 : 1245712 : sd->smgr.blockNum = blocknum;
1044 : 1245712 : sd->smgr.nblocks = nblocks;
1045 : 1245712 : sd->smgr.is_temp = SmgrIsTemp(smgr);
1046 : : /* Temp relations should never be fsync'd */
1047 [ - + - - ]: 1245712 : sd->smgr.skip_fsync = skip_fsync && !SmgrIsTemp(smgr);
1048 : 1245712 : }
1049 : :
1050 : : /*
1051 : : * Callback for the smgr AIO target, to reopen the file (e.g. because the IO
1052 : : * is executed in a worker).
1053 : : */
1054 : : static void
1055 : 448402 : smgr_aio_reopen(PgAioHandle *ioh)
1056 : : {
1057 : 448402 : PgAioTargetData *sd = pgaio_io_get_target_data(ioh);
1058 : 448402 : PgAioOpData *od = pgaio_io_get_op_data(ioh);
1059 : : SMgrRelation reln;
1060 : : ProcNumber procno;
1061 : : uint32 off;
1062 : :
1063 : : /*
1064 : : * The caller needs to prevent interrupts from being processed, otherwise
1065 : : * the FD could be closed again before we get to executing the IO.
1066 : : */
1067 [ - + - - : 448402 : Assert(!INTERRUPTS_CAN_BE_PROCESSED());
- - ]
1068 : :
1069 [ - + ]: 448402 : if (sd->smgr.is_temp)
161 andres@anarazel.de 1070 :UBC 0 : procno = pgaio_io_get_owner(ioh);
1071 : : else
161 andres@anarazel.de 1072 :CBC 448402 : procno = INVALID_PROC_NUMBER;
1073 : :
1074 : 448402 : reln = smgropen(sd->smgr.rlocator, procno);
1075 [ - + - - ]: 448402 : switch (pgaio_io_get_op(ioh))
1076 : : {
161 andres@anarazel.de 1077 :UBC 0 : case PGAIO_OP_INVALID:
1078 : 0 : pg_unreachable();
1079 : : break;
161 andres@anarazel.de 1080 :CBC 448402 : case PGAIO_OP_READV:
1081 : 448402 : od->read.fd = smgrfd(reln, sd->smgr.forkNum, sd->smgr.blockNum, &off);
1082 [ - + ]: 448402 : Assert(off == od->read.offset);
1083 : 448402 : break;
161 andres@anarazel.de 1084 :UBC 0 : case PGAIO_OP_WRITEV:
1085 : 0 : od->write.fd = smgrfd(reln, sd->smgr.forkNum, sd->smgr.blockNum, &off);
1086 [ # # ]: 0 : Assert(off == od->write.offset);
1087 : 0 : break;
1088 : : }
161 andres@anarazel.de 1089 :CBC 448402 : }
1090 : :
1091 : : /*
1092 : : * Callback for the smgr AIO target, describing the target of the IO.
1093 : : */
1094 : : static char *
161 andres@anarazel.de 1095 :UBC 0 : smgr_aio_describe_identity(const PgAioTargetData *sd)
1096 : : {
1097 : : RelPathStr path;
1098 : : char *desc;
1099 : :
1100 [ # # ]: 0 : path = relpathbackend(sd->smgr.rlocator,
1101 : : sd->smgr.is_temp ?
1102 : : MyProcNumber : INVALID_PROC_NUMBER,
1103 : : sd->smgr.forkNum);
1104 : :
1105 [ # # ]: 0 : if (sd->smgr.nblocks == 0)
1106 : 0 : desc = psprintf(_("file \"%s\""), path.str);
1107 [ # # ]: 0 : else if (sd->smgr.nblocks == 1)
1108 : 0 : desc = psprintf(_("block %u in file \"%s\""),
1109 : 0 : sd->smgr.blockNum,
1110 : : path.str);
1111 : : else
1112 : 0 : desc = psprintf(_("blocks %u..%u in file \"%s\""),
1113 : 0 : sd->smgr.blockNum,
1114 : 0 : sd->smgr.blockNum + sd->smgr.nblocks - 1,
1115 : : path.str);
1116 : :
1117 : 0 : return desc;
1118 : : }
|