Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * dsa.c
4 : : * Dynamic shared memory areas.
5 : : *
6 : : * This module provides dynamic shared memory areas which are built on top of
7 : : * DSM segments. While dsm.c allows segments of memory of shared memory to be
8 : : * created and shared between backends, it isn't designed to deal with small
9 : : * objects. A DSA area is a shared memory heap usually backed by one or more
10 : : * DSM segments which can allocate memory using dsa_allocate() and dsa_free().
11 : : * Alternatively, it can be created in pre-existing shared memory, including a
12 : : * DSM segment, and then create extra DSM segments as required. Unlike the
13 : : * regular system heap, it deals in pseudo-pointers which must be converted to
14 : : * backend-local pointers before they are dereferenced. These pseudo-pointers
15 : : * can however be shared with other backends, and can be used to construct
16 : : * shared data structures.
17 : : *
18 : : * Each DSA area manages a set of DSM segments, adding new segments as
19 : : * required and detaching them when they are no longer needed. Each segment
20 : : * contains a number of 4KB pages, a free page manager for tracking
21 : : * consecutive runs of free pages, and a page map for tracking the source of
22 : : * objects allocated on each page. Allocation requests above 8KB are handled
23 : : * by choosing a segment and finding consecutive free pages in its free page
24 : : * manager. Allocation requests for smaller sizes are handled using pools of
25 : : * objects of a selection of sizes. Each pool consists of a number of 16 page
26 : : * (64KB) superblocks allocated in the same way as large objects. Allocation
27 : : * of large objects and new superblocks is serialized by a single LWLock, but
28 : : * allocation of small objects from pre-existing superblocks uses one LWLock
29 : : * per pool. Currently there is one pool, and therefore one lock, per size
30 : : * class. Per-core pools to increase concurrency and strategies for reducing
31 : : * the resulting fragmentation are areas for future research. Each superblock
32 : : * is managed with a 'span', which tracks the superblock's freelist. Free
33 : : * requests are handled by looking in the page map to find which span an
34 : : * address was allocated from, so that small objects can be returned to the
35 : : * appropriate free list, and large object pages can be returned directly to
36 : : * the free page map. When allocating, simple heuristics for selecting
37 : : * segments and superblocks try to encourage occupied memory to be
38 : : * concentrated, increasing the likelihood that whole superblocks can become
39 : : * empty and be returned to the free page manager, and whole segments can
40 : : * become empty and be returned to the operating system.
41 : : *
42 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
43 : : * Portions Copyright (c) 1994, Regents of the University of California
44 : : *
45 : : * IDENTIFICATION
46 : : * src/backend/utils/mmgr/dsa.c
47 : : *
48 : : *-------------------------------------------------------------------------
49 : : */
50 : :
51 : : #include "postgres.h"
52 : :
53 : : #include "port/atomics.h"
54 : : #include "port/pg_bitutils.h"
55 : : #include "storage/dsm.h"
56 : : #include "storage/lwlock.h"
57 : : #include "utils/dsa.h"
58 : : #include "utils/freepage.h"
59 : : #include "utils/memutils.h"
60 : : #include "utils/resowner.h"
61 : :
62 : : /*
63 : : * How many segments to create before we double the segment size. If this is
64 : : * low, then there is likely to be a lot of wasted space in the largest
65 : : * segment. If it is high, then we risk running out of segment slots (see
66 : : * dsm.c's limits on total number of segments), or limiting the total size
67 : : * an area can manage when using small pointers.
68 : : */
69 : : #define DSA_NUM_SEGMENTS_AT_EACH_SIZE 2
70 : :
71 : : /*
72 : : * The maximum number of DSM segments that an area can own, determined by
73 : : * the number of bits remaining (but capped at 1024).
74 : : */
75 : : #define DSA_MAX_SEGMENTS \
76 : : Min(1024, (1 << ((SIZEOF_DSA_POINTER * 8) - DSA_OFFSET_WIDTH)))
77 : :
78 : : /* The bitmask for extracting the offset from a dsa_pointer. */
79 : : #define DSA_OFFSET_BITMASK (((dsa_pointer) 1 << DSA_OFFSET_WIDTH) - 1)
80 : :
81 : : /* Number of pages (see FPM_PAGE_SIZE) per regular superblock. */
82 : : #define DSA_PAGES_PER_SUPERBLOCK 16
83 : :
84 : : /*
85 : : * A magic number used as a sanity check for following DSM segments belonging
86 : : * to a DSA area (this number will be XORed with the area handle and
87 : : * the segment index).
88 : : */
89 : : #define DSA_SEGMENT_HEADER_MAGIC 0x0ce26608
90 : :
91 : : /* Build a dsa_pointer given a segment number and offset. */
92 : : #define DSA_MAKE_POINTER(segment_number, offset) \
93 : : (((dsa_pointer) (segment_number) << DSA_OFFSET_WIDTH) | (offset))
94 : :
95 : : /* Extract the segment number from a dsa_pointer. */
96 : : #define DSA_EXTRACT_SEGMENT_NUMBER(dp) ((dp) >> DSA_OFFSET_WIDTH)
97 : :
98 : : /* Extract the offset from a dsa_pointer. */
99 : : #define DSA_EXTRACT_OFFSET(dp) ((dp) & DSA_OFFSET_BITMASK)
100 : :
101 : : /* The type used for index segment indexes (zero based). */
102 : : typedef size_t dsa_segment_index;
103 : :
104 : : /* Sentinel value for dsa_segment_index indicating 'none' or 'end'. */
105 : : #define DSA_SEGMENT_INDEX_NONE (~(dsa_segment_index)0)
106 : :
107 : : /*
108 : : * How many bins of segments do we have? The bins are used to categorize
109 : : * segments by their largest contiguous run of free pages.
110 : : */
111 : : #define DSA_NUM_SEGMENT_BINS 16
112 : :
113 : : /*
114 : : * What is the lowest bin that holds segments that *might* have n contiguous
115 : : * free pages? There is no point in looking in segments in lower bins; they
116 : : * definitely can't service a request for n free pages.
117 : : */
118 : : static inline size_t
1243 tmunro@postgresql.or 119 :CBC 21539 : contiguous_pages_to_segment_bin(size_t n)
120 : : {
121 : : size_t bin;
122 : :
123 [ + + ]: 21539 : if (n == 0)
124 : 756 : bin = 0;
125 : : else
126 : 20783 : bin = pg_leftmost_one_pos_size_t(n) + 1;
127 : :
128 : 21539 : return Min(bin, DSA_NUM_SEGMENT_BINS - 1);
129 : : }
130 : :
131 : : /* Macros for access to locks. */
132 : : #define DSA_AREA_LOCK(area) (&area->control->lock)
133 : : #define DSA_SCLASS_LOCK(area, sclass) (&area->control->pools[sclass].lock)
134 : :
135 : : /*
136 : : * The header for an individual segment. This lives at the start of each DSM
137 : : * segment owned by a DSA area including the first segment (where it appears
138 : : * as part of the dsa_area_control struct).
139 : : */
140 : : typedef struct
141 : : {
142 : : /* Sanity check magic value. */
143 : : uint32 magic;
144 : : /* Total number of pages in this segment (excluding metadata area). */
145 : : size_t usable_pages;
146 : : /* Total size of this segment in bytes. */
147 : : size_t size;
148 : :
149 : : /*
150 : : * Index of the segment that precedes this one in the same segment bin, or
151 : : * DSA_SEGMENT_INDEX_NONE if this is the first one.
152 : : */
153 : : dsa_segment_index prev;
154 : :
155 : : /*
156 : : * Index of the segment that follows this one in the same segment bin, or
157 : : * DSA_SEGMENT_INDEX_NONE if this is the last one.
158 : : */
159 : : dsa_segment_index next;
160 : : /* The index of the bin that contains this segment. */
161 : : size_t bin;
162 : :
163 : : /*
164 : : * A flag raised to indicate that this segment is being returned to the
165 : : * operating system and has been unpinned.
166 : : */
167 : : bool freed;
168 : : } dsa_segment_header;
169 : :
170 : : /*
171 : : * Metadata for one superblock.
172 : : *
173 : : * For most blocks, span objects are stored out-of-line; that is, the span
174 : : * object is not stored within the block itself. But, as an exception, for a
175 : : * "span of spans", the span object is stored "inline". The allocation is
176 : : * always exactly one page, and the dsa_area_span object is located at
177 : : * the beginning of that page. The size class is DSA_SCLASS_BLOCK_OF_SPANS,
178 : : * and the remaining fields are used just as they would be in an ordinary
179 : : * block. We can't allocate spans out of ordinary superblocks because
180 : : * creating an ordinary superblock requires us to be able to allocate a span
181 : : * *first*. Doing it this way avoids that circularity.
182 : : */
183 : : typedef struct
184 : : {
185 : : dsa_pointer pool; /* Containing pool. */
186 : : dsa_pointer prevspan; /* Previous span. */
187 : : dsa_pointer nextspan; /* Next span. */
188 : : dsa_pointer start; /* Starting address. */
189 : : size_t npages; /* Length of span in pages. */
190 : : uint16 size_class; /* Size class. */
191 : : uint16 ninitialized; /* Maximum number of objects ever allocated. */
192 : : uint16 nallocatable; /* Number of objects currently allocatable. */
193 : : uint16 firstfree; /* First object on free list. */
194 : : uint16 nmax; /* Maximum number of objects ever possible. */
195 : : uint16 fclass; /* Current fullness class. */
196 : : } dsa_area_span;
197 : :
198 : : /*
199 : : * Given a pointer to an object in a span, access the index of the next free
200 : : * object in the same span (ie in the span's freelist) as an L-value.
201 : : */
202 : : #define NextFreeObjectIndex(object) (* (uint16 *) (object))
203 : :
204 : : /*
205 : : * Small allocations are handled by dividing a single block of memory into
206 : : * many small objects of equal size. The possible allocation sizes are
207 : : * defined by the following array. Larger size classes are spaced more widely
208 : : * than smaller size classes. We fudge the spacing for size classes >1kB to
209 : : * avoid space wastage: based on the knowledge that we plan to allocate 64kB
210 : : * blocks, we bump the maximum object size up to the largest multiple of
211 : : * 8 bytes that still lets us fit the same number of objects into one block.
212 : : *
213 : : * NB: Because of this fudging, if we were ever to use differently-sized blocks
214 : : * for small allocations, these size classes would need to be reworked to be
215 : : * optimal for the new size.
216 : : *
217 : : * NB: The optimal spacing for size classes, as well as the size of the blocks
218 : : * out of which small objects are allocated, is not a question that has one
219 : : * right answer. Some allocators (such as tcmalloc) use more closely-spaced
220 : : * size classes than we do here, while others (like aset.c) use more
221 : : * widely-spaced classes. Spacing the classes more closely avoids wasting
222 : : * memory within individual chunks, but also means a larger number of
223 : : * potentially-unfilled blocks.
224 : : */
225 : : static const uint16 dsa_size_classes[] = {
226 : : sizeof(dsa_area_span), 0, /* special size classes */
227 : : 8, 16, 24, 32, 40, 48, 56, 64, /* 8 classes separated by 8 bytes */
228 : : 80, 96, 112, 128, /* 4 classes separated by 16 bytes */
229 : : 160, 192, 224, 256, /* 4 classes separated by 32 bytes */
230 : : 320, 384, 448, 512, /* 4 classes separated by 64 bytes */
231 : : 640, 768, 896, 1024, /* 4 classes separated by 128 bytes */
232 : : 1280, 1560, 1816, 2048, /* 4 classes separated by ~256 bytes */
233 : : 2616, 3120, 3640, 4096, /* 4 classes separated by ~512 bytes */
234 : : 5456, 6552, 7280, 8192 /* 4 classes separated by ~1024 bytes */
235 : : };
236 : : #define DSA_NUM_SIZE_CLASSES lengthof(dsa_size_classes)
237 : :
238 : : /* Special size classes. */
239 : : #define DSA_SCLASS_BLOCK_OF_SPANS 0
240 : : #define DSA_SCLASS_SPAN_LARGE 1
241 : :
242 : : /*
243 : : * The following lookup table is used to map the size of small objects
244 : : * (less than 1kB) onto the corresponding size class. To use this table,
245 : : * round the size of the object up to the next multiple of 8 bytes, and then
246 : : * index into this array.
247 : : */
248 : : static const uint8 dsa_size_class_map[] = {
249 : : 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11, 11, 12, 12, 13, 13,
250 : : 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17,
251 : : 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
252 : : 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21,
253 : : 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
254 : : 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
255 : : 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
256 : : 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25
257 : : };
258 : : #define DSA_SIZE_CLASS_MAP_QUANTUM 8
259 : :
260 : : /*
261 : : * Superblocks are binned by how full they are. Generally, each fullness
262 : : * class corresponds to one quartile, but the block being used for
263 : : * allocations is always at the head of the list for fullness class 1,
264 : : * regardless of how full it really is.
265 : : */
266 : : #define DSA_FULLNESS_CLASSES 4
267 : :
268 : : /*
269 : : * A dsa_area_pool represents a set of objects of a given size class.
270 : : *
271 : : * Perhaps there should be multiple pools for the same size class for
272 : : * contention avoidance, but for now there is just one!
273 : : */
274 : : typedef struct
275 : : {
276 : : /* A lock protecting access to this pool. */
277 : : LWLock lock;
278 : : /* A set of linked lists of spans, arranged by fullness. */
279 : : dsa_pointer spans[DSA_FULLNESS_CLASSES];
280 : : /* Should we pad this out to a cacheline boundary? */
281 : : } dsa_area_pool;
282 : :
283 : : /*
284 : : * The control block for an area. This lives in shared memory, at the start of
285 : : * the first DSM segment controlled by this area.
286 : : */
287 : : typedef struct
288 : : {
289 : : /* The segment header for the first segment. */
290 : : dsa_segment_header segment_header;
291 : : /* The handle for this area. */
292 : : dsa_handle handle;
293 : : /* The handles of the segments owned by this area. */
294 : : dsm_handle segment_handles[DSA_MAX_SEGMENTS];
295 : : /* Lists of segments, binned by maximum contiguous run of free pages. */
296 : : dsa_segment_index segment_bins[DSA_NUM_SEGMENT_BINS];
297 : : /* The object pools for each size class. */
298 : : dsa_area_pool pools[DSA_NUM_SIZE_CLASSES];
299 : : /* initial allocation segment size */
300 : : size_t init_segment_size;
301 : : /* maximum allocation segment size */
302 : : size_t max_segment_size;
303 : : /* The total size of all active segments. */
304 : : size_t total_segment_size;
305 : : /* The maximum total size of backing storage we are allowed. */
306 : : size_t max_total_segment_size;
307 : : /* Highest used segment index in the history of this area. */
308 : : dsa_segment_index high_segment_index;
309 : : /* The reference count for this area. */
310 : : int refcnt;
311 : : /* A flag indicating that this area has been pinned. */
312 : : bool pinned;
313 : : /* The number of times that segments have been freed. */
314 : : size_t freed_segment_counter;
315 : : /* The LWLock tranche ID. */
316 : : int lwlock_tranche_id;
317 : : /* The general lock (protects everything except object pools). */
318 : : LWLock lock;
319 : : } dsa_area_control;
320 : :
321 : : /* Given a pointer to a pool, find a dsa_pointer. */
322 : : #define DsaAreaPoolToDsaPointer(area, p) \
323 : : DSA_MAKE_POINTER(0, (char *) p - (char *) area->control)
324 : :
325 : : /*
326 : : * A dsa_segment_map is stored within the backend-private memory of each
327 : : * individual backend. It holds the base address of the segment within that
328 : : * backend, plus the addresses of key objects within the segment. Those
329 : : * could instead be derived from the base address but it's handy to have them
330 : : * around.
331 : : */
332 : : typedef struct
333 : : {
334 : : dsm_segment *segment; /* DSM segment */
335 : : char *mapped_address; /* Address at which segment is mapped */
336 : : dsa_segment_header *header; /* Header (same as mapped_address) */
337 : : FreePageManager *fpm; /* Free page manager within segment. */
338 : : dsa_pointer *pagemap; /* Page map within segment. */
339 : : } dsa_segment_map;
340 : :
341 : : /*
342 : : * Per-backend state for a storage area. Backends obtain one of these by
343 : : * creating an area or attaching to an existing one using a handle. Each
344 : : * process that needs to use an area uses its own object to track where the
345 : : * segments are mapped.
346 : : */
347 : : struct dsa_area
348 : : {
349 : : /* Pointer to the control object in shared memory. */
350 : : dsa_area_control *control;
351 : :
352 : : /*
353 : : * All the mappings are owned by this. The dsa_area itself is not
354 : : * directly tracked by the ResourceOwner, but the effect is the same. NULL
355 : : * if the attachment has session lifespan, i.e if dsa_pin_mapping() has
356 : : * been called.
357 : : */
358 : : ResourceOwner resowner;
359 : :
360 : : /*
361 : : * This backend's array of segment maps, ordered by segment index
362 : : * corresponding to control->segment_handles. Some of the area's segments
363 : : * may not be mapped in this backend yet, and some slots may have been
364 : : * freed and need to be detached; these operations happen on demand.
365 : : */
366 : : dsa_segment_map segment_maps[DSA_MAX_SEGMENTS];
367 : :
368 : : /* The highest segment index this backend has ever mapped. */
369 : : dsa_segment_index high_segment_index;
370 : :
371 : : /* The last observed freed_segment_counter. */
372 : : size_t freed_segment_counter;
373 : : };
374 : :
375 : : #define DSA_SPAN_NOTHING_FREE ((uint16) -1)
376 : : #define DSA_SUPERBLOCK_SIZE (DSA_PAGES_PER_SUPERBLOCK * FPM_PAGE_SIZE)
377 : :
378 : : /* Given a pointer to a segment_map, obtain a segment index number. */
379 : : #define get_segment_index(area, segment_map_ptr) \
380 : : (segment_map_ptr - &area->segment_maps[0])
381 : :
382 : : static void init_span(dsa_area *area, dsa_pointer span_pointer,
383 : : dsa_area_pool *pool, dsa_pointer start, size_t npages,
384 : : uint16 size_class);
385 : : static bool transfer_first_span(dsa_area *area, dsa_area_pool *pool,
386 : : int fromclass, int toclass);
387 : : static inline dsa_pointer alloc_object(dsa_area *area, int size_class);
388 : : static bool ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
389 : : int size_class);
390 : : static dsa_segment_map *get_segment_by_index(dsa_area *area,
391 : : dsa_segment_index index);
392 : : static void destroy_superblock(dsa_area *area, dsa_pointer span_pointer);
393 : : static void unlink_span(dsa_area *area, dsa_area_span *span);
394 : : static void add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
395 : : dsa_pointer span_pointer, int fclass);
396 : : static void unlink_segment(dsa_area *area, dsa_segment_map *segment_map);
397 : : static dsa_segment_map *get_best_segment(dsa_area *area, size_t npages);
398 : : static dsa_segment_map *make_new_segment(dsa_area *area, size_t requested_pages);
399 : : static dsa_area *create_internal(void *place, size_t size,
400 : : int tranche_id,
401 : : dsm_handle control_handle,
402 : : dsm_segment *control_segment,
403 : : size_t init_segment_size,
404 : : size_t max_segment_size);
405 : : static dsa_area *attach_internal(void *place, dsm_segment *segment,
406 : : dsa_handle handle);
407 : : static void check_for_freed_segments(dsa_area *area);
408 : : static void check_for_freed_segments_locked(dsa_area *area);
409 : : static void rebin_segment(dsa_area *area, dsa_segment_map *segment_map);
410 : :
411 : : /*
412 : : * Create a new shared area in a new DSM segment. Further DSM segments will
413 : : * be allocated as required to extend the available space.
414 : : *
415 : : * We can't allocate a LWLock tranche_id within this function, because tranche
416 : : * IDs are a scarce resource; there are only 64k available, using low numbers
417 : : * when possible matters, and we have no provision for recycling them. So,
418 : : * we require the caller to provide one.
419 : : */
420 : : dsa_area *
629 msawada@postgresql.o 421 : 96 : dsa_create_ext(int tranche_id, size_t init_segment_size, size_t max_segment_size)
422 : : {
423 : : dsm_segment *segment;
424 : : dsa_area *area;
425 : :
426 : : /*
427 : : * Create the DSM segment that will hold the shared control object and the
428 : : * first segment of usable space.
429 : : */
430 : 96 : segment = dsm_create(init_segment_size, 0);
431 : :
432 : : /*
433 : : * All segments backing this area are pinned, so that DSA can explicitly
434 : : * control their lifetime (otherwise a newly created segment belonging to
435 : : * this area might be freed when the only backend that happens to have it
436 : : * mapped in ends, corrupting the area).
437 : : */
3301 rhaas@postgresql.org 438 : 96 : dsm_pin_segment(segment);
439 : :
440 : : /* Create a new DSA area with the control object in this segment. */
441 : 96 : area = create_internal(dsm_segment_address(segment),
442 : : init_segment_size,
443 : : tranche_id,
444 : : dsm_segment_handle(segment), segment,
445 : : init_segment_size, max_segment_size);
446 : :
447 : : /* Clean up when the control segment detaches. */
448 : 96 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
449 : 96 : PointerGetDatum(dsm_segment_address(segment)));
450 : :
451 : 96 : return area;
452 : : }
453 : :
454 : : /*
455 : : * Create a new shared area in an existing shared memory space, which may be
456 : : * either DSM or Postmaster-initialized memory. DSM segments will be
457 : : * allocated as required to extend the available space, though that can be
458 : : * prevented with dsa_set_size_limit(area, size) using the same size provided
459 : : * to dsa_create_in_place.
460 : : *
461 : : * Areas created in-place must eventually be released by the backend that
462 : : * created them and all backends that attach to them. This can be done
463 : : * explicitly with dsa_release_in_place, or, in the special case that 'place'
464 : : * happens to be in a pre-existing DSM segment, by passing in a pointer to the
465 : : * segment so that a detach hook can be registered with the containing DSM
466 : : * segment.
467 : : *
468 : : * See dsa_create() for a note about the tranche arguments.
469 : : */
470 : : dsa_area *
629 msawada@postgresql.o 471 : 1523 : dsa_create_in_place_ext(void *place, size_t size,
472 : : int tranche_id, dsm_segment *segment,
473 : : size_t init_segment_size, size_t max_segment_size)
474 : : {
475 : : dsa_area *area;
476 : :
3267 rhaas@postgresql.org 477 : 1523 : area = create_internal(place, size, tranche_id,
478 : : DSM_HANDLE_INVALID, NULL,
479 : : init_segment_size, max_segment_size);
480 : :
481 : : /*
482 : : * Clean up when the control segment detaches, if a containing DSM segment
483 : : * was provided.
484 : : */
3301 485 [ + + ]: 1523 : if (segment != NULL)
486 : 454 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
487 : : PointerGetDatum(place));
488 : :
489 : 1523 : return area;
490 : : }
491 : :
492 : : /*
493 : : * Obtain a handle that can be passed to other processes so that they can
494 : : * attach to the given area. Cannot be called for areas created with
495 : : * dsa_create_in_place.
496 : : */
497 : : dsa_handle
498 : 93 : dsa_get_handle(dsa_area *area)
499 : : {
1056 tgl@sss.pgh.pa.us 500 [ - + ]: 93 : Assert(area->control->handle != DSA_HANDLE_INVALID);
3301 rhaas@postgresql.org 501 : 93 : return area->control->handle;
502 : : }
503 : :
504 : : /*
505 : : * Attach to an area given a handle generated (possibly in another process) by
506 : : * dsa_get_handle. The area must have been created with dsa_create (not
507 : : * dsa_create_in_place).
508 : : */
509 : : dsa_area *
510 : 247 : dsa_attach(dsa_handle handle)
511 : : {
512 : : dsm_segment *segment;
513 : : dsa_area *area;
514 : :
515 : : /*
516 : : * An area handle is really a DSM segment handle for the first segment, so
517 : : * we go ahead and attach to that.
518 : : */
519 : 247 : segment = dsm_attach(handle);
520 [ - + ]: 247 : if (segment == NULL)
3301 rhaas@postgresql.org 521 [ # # ]:UBC 0 : ereport(ERROR,
522 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
523 : : errmsg("could not attach to dynamic shared area")));
524 : :
3301 rhaas@postgresql.org 525 :CBC 247 : area = attach_internal(dsm_segment_address(segment), segment, handle);
526 : :
527 : : /* Clean up when the control segment detaches. */
528 : 247 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
529 : 247 : PointerGetDatum(dsm_segment_address(segment)));
530 : :
531 : 247 : return area;
532 : : }
533 : :
534 : : /*
535 : : * Returns whether the area with the given handle was already attached by the
536 : : * current process. The area must have been created with dsa_create (not
537 : : * dsa_create_in_place).
538 : : */
539 : : bool
167 nathan@postgresql.or 540 :GNC 14 : dsa_is_attached(dsa_handle handle)
541 : : {
542 : : /*
543 : : * An area handle is really a DSM segment handle for the first segment, so
544 : : * we can just search for that.
545 : : */
546 : 14 : return dsm_find_mapping(handle) != NULL;
547 : : }
548 : :
549 : : /*
550 : : * Attach to an area that was created with dsa_create_in_place. The caller
551 : : * must somehow know the location in memory that was used when the area was
552 : : * created, though it may be mapped at a different virtual address in this
553 : : * process.
554 : : *
555 : : * See dsa_create_in_place for note about releasing in-place areas, and the
556 : : * optional 'segment' argument which can be provided to allow automatic
557 : : * release if the containing memory happens to be a DSM segment.
558 : : */
559 : : dsa_area *
3301 rhaas@postgresql.org 560 :CBC 22479 : dsa_attach_in_place(void *place, dsm_segment *segment)
561 : : {
562 : : dsa_area *area;
563 : :
1056 tgl@sss.pgh.pa.us 564 : 22479 : area = attach_internal(place, NULL, DSA_HANDLE_INVALID);
565 : :
566 : : /*
567 : : * Clean up when the control segment detaches, if a containing DSM segment
568 : : * was provided.
569 : : */
3301 rhaas@postgresql.org 570 [ + + ]: 22479 : if (segment != NULL)
571 : 2777 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
572 : : PointerGetDatum(place));
573 : :
574 : 22479 : return area;
575 : : }
576 : :
577 : : /*
578 : : * Release a DSA area that was produced by dsa_create_in_place or
579 : : * dsa_attach_in_place. The 'segment' argument is ignored but provides an
580 : : * interface suitable for on_dsm_detach, for the convenience of users who want
581 : : * to create a DSA segment inside an existing DSM segment and have it
582 : : * automatically released when the containing DSM segment is detached.
583 : : * 'place' should be the address of the place where the area was created.
584 : : *
585 : : * This callback is automatically registered for the DSM segment containing
586 : : * the control object of in-place areas when a segment is provided to
587 : : * dsa_create_in_place or dsa_attach_in_place, and also for all areas created
588 : : * with dsa_create.
589 : : */
590 : : void
591 : 3574 : dsa_on_dsm_detach_release_in_place(dsm_segment *segment, Datum place)
592 : : {
593 : 3574 : dsa_release_in_place(DatumGetPointer(place));
594 : 3574 : }
595 : :
596 : : /*
597 : : * Release a DSA area that was produced by dsa_create_in_place or
598 : : * dsa_attach_in_place. The 'code' argument is ignored but provides an
599 : : * interface suitable for on_shmem_exit or before_shmem_exit, for the
600 : : * convenience of users who want to create a DSA segment inside shared memory
601 : : * other than a DSM segment and have it automatically release at backend exit.
602 : : * 'place' should be the address of the place where the area was created.
603 : : */
604 : : void
3301 rhaas@postgresql.org 605 :UBC 0 : dsa_on_shmem_exit_release_in_place(int code, Datum place)
606 : : {
607 : 0 : dsa_release_in_place(DatumGetPointer(place));
608 : 0 : }
609 : :
610 : : /*
611 : : * Release a DSA area that was produced by dsa_create_in_place or
612 : : * dsa_attach_in_place. It is preferable to use one of the 'dsa_on_XXX'
613 : : * callbacks so that this is managed automatically, because failure to release
614 : : * an area created in-place leaks its segments permanently.
615 : : *
616 : : * This is also called automatically for areas produced by dsa_create or
617 : : * dsa_attach as an implementation detail.
618 : : */
619 : : void
3301 rhaas@postgresql.org 620 :CBC 23276 : dsa_release_in_place(void *place)
621 : : {
622 : 23276 : dsa_area_control *control = (dsa_area_control *) place;
623 : : int i;
624 : :
625 : 23276 : LWLockAcquire(&control->lock, LW_EXCLUSIVE);
626 [ - + ]: 23276 : Assert(control->segment_header.magic ==
627 : : (DSA_SEGMENT_HEADER_MAGIC ^ control->handle ^ 0));
628 [ - + ]: 23276 : Assert(control->refcnt > 0);
629 [ + + ]: 23276 : if (--control->refcnt == 0)
630 : : {
631 [ + + ]: 1106 : for (i = 0; i <= control->high_segment_index; ++i)
632 : : {
633 : : dsm_handle handle;
634 : :
635 : 619 : handle = control->segment_handles[i];
636 [ + + ]: 619 : if (handle != DSM_HANDLE_INVALID)
637 : 165 : dsm_unpin_segment(handle);
638 : : }
639 : : }
640 : 23276 : LWLockRelease(&control->lock);
641 : 23276 : }
642 : :
643 : : /*
644 : : * Keep a DSA area attached until end of session or explicit detach.
645 : : *
646 : : * By default, areas are owned by the current resource owner, which means they
647 : : * are detached automatically when that scope ends.
648 : : */
649 : : void
650 : 21504 : dsa_pin_mapping(dsa_area *area)
651 : : {
652 : : int i;
653 : :
762 heikki.linnakangas@i 654 [ + + ]: 21504 : if (area->resowner != NULL)
655 : : {
656 : 1740 : area->resowner = NULL;
657 : :
658 [ + + ]: 3493 : for (i = 0; i <= area->high_segment_index; ++i)
659 [ + + ]: 1753 : if (area->segment_maps[i].segment != NULL)
660 : 232 : dsm_pin_mapping(area->segment_maps[i].segment);
661 : : }
3301 rhaas@postgresql.org 662 : 21504 : }
663 : :
664 : : /*
665 : : * Allocate memory in this storage area. The return value is a dsa_pointer
666 : : * that can be passed to other processes, and converted to a local pointer
667 : : * with dsa_get_address. 'flags' is a bitmap which should be constructed
668 : : * from the following values:
669 : : *
670 : : * DSA_ALLOC_HUGE allows allocations >= 1GB. Otherwise, such allocations
671 : : * will result in an ERROR.
672 : : *
673 : : * DSA_ALLOC_NO_OOM causes this function to return InvalidDsaPointer when
674 : : * no memory is available or a size limit established by dsa_set_size_limit
675 : : * would be exceeded. Otherwise, such allocations will result in an ERROR.
676 : : *
677 : : * DSA_ALLOC_ZERO causes the allocated memory to be zeroed. Otherwise, the
678 : : * contents of newly-allocated memory are indeterminate.
679 : : *
680 : : * These flags correspond to similarly named flags used by
681 : : * MemoryContextAllocExtended(). See also the macros dsa_allocate and
682 : : * dsa_allocate0 which expand to a call to this function with commonly used
683 : : * flags.
684 : : */
685 : : dsa_pointer
2642 tmunro@postgresql.or 686 : 647304 : dsa_allocate_extended(dsa_area *area, size_t size, int flags)
687 : : {
688 : : uint16 size_class;
689 : : dsa_pointer start_pointer;
690 : : dsa_segment_map *segment_map;
691 : : dsa_pointer result;
692 : :
3301 rhaas@postgresql.org 693 [ - + ]: 647304 : Assert(size > 0);
694 : :
695 : : /* Sanity check on huge individual allocation size. */
3222 696 [ + + + - ]: 647304 : if (((flags & DSA_ALLOC_HUGE) != 0 && !AllocHugeSizeIsValid(size)) ||
697 [ + + - + ]: 647304 : ((flags & DSA_ALLOC_HUGE) == 0 && !AllocSizeIsValid(size)))
3222 rhaas@postgresql.org 698 [ # # ]:UBC 0 : elog(ERROR, "invalid DSA memory alloc request size %zu", size);
699 : :
700 : : /*
701 : : * If bigger than the largest size class, just grab a run of pages from
702 : : * the free page manager, instead of allocating an object from a pool.
703 : : * There will still be a span, but it's a special class of span that
704 : : * manages this whole allocation and simply gives all pages back to the
705 : : * free page manager when dsa_free is called.
706 : : */
3301 rhaas@postgresql.org 707 [ + + ]:CBC 647304 : if (size > dsa_size_classes[lengthof(dsa_size_classes) - 1])
708 : : {
2642 tmunro@postgresql.or 709 : 2776 : size_t npages = fpm_size_to_pages(size);
710 : : size_t first_page;
711 : : dsa_pointer span_pointer;
3301 rhaas@postgresql.org 712 : 2776 : dsa_area_pool *pool = &area->control->pools[DSA_SCLASS_SPAN_LARGE];
713 : :
714 : : /* Obtain a span object. */
715 : 2776 : span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
716 [ - + ]: 2776 : if (!DsaPointerIsValid(span_pointer))
717 : : {
718 : : /* Raise error unless asked not to. */
2486 tmunro@postgresql.or 719 [ # # ]:UBC 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
720 [ # # ]: 0 : ereport(ERROR,
721 : : (errcode(ERRCODE_OUT_OF_MEMORY),
722 : : errmsg("out of memory"),
723 : : errdetail("Failed on DSA request of size %zu.",
724 : : size)));
3301 rhaas@postgresql.org 725 : 0 : return InvalidDsaPointer;
726 : : }
727 : :
3301 rhaas@postgresql.org 728 :CBC 2776 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
729 : :
730 : : /* Find a segment from which to allocate. */
731 : 2776 : segment_map = get_best_segment(area, npages);
732 [ + + ]: 2776 : if (segment_map == NULL)
733 : 23 : segment_map = make_new_segment(area, npages);
734 [ - + ]: 2776 : if (segment_map == NULL)
735 : : {
736 : : /* Can't make any more segments: game over. */
3301 rhaas@postgresql.org 737 :UBC 0 : LWLockRelease(DSA_AREA_LOCK(area));
738 : 0 : dsa_free(area, span_pointer);
739 : :
740 : : /* Raise error unless asked not to. */
3036 andres@anarazel.de 741 [ # # ]: 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
3222 rhaas@postgresql.org 742 [ # # ]: 0 : ereport(ERROR,
743 : : (errcode(ERRCODE_OUT_OF_MEMORY),
744 : : errmsg("out of memory"),
745 : : errdetail("Failed on DSA request of size %zu.",
746 : : size)));
3301 747 : 0 : return InvalidDsaPointer;
748 : : }
749 : :
750 : : /*
751 : : * Ask the free page manager for a run of pages. This should always
752 : : * succeed, since both get_best_segment and make_new_segment should
753 : : * only return a non-NULL pointer if it actually contains enough
754 : : * contiguous freespace. If it does fail, something in our backend
755 : : * private state is out of whack, so use FATAL to kill the process.
756 : : */
3301 rhaas@postgresql.org 757 [ - + ]:CBC 2776 : if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
3301 rhaas@postgresql.org 758 [ # # ]:UBC 0 : elog(FATAL,
759 : : "dsa_allocate could not find %zu free pages", npages);
3301 rhaas@postgresql.org 760 :CBC 2776 : LWLockRelease(DSA_AREA_LOCK(area));
761 : :
762 : 2776 : start_pointer = DSA_MAKE_POINTER(get_segment_index(area, segment_map),
763 : : first_page * FPM_PAGE_SIZE);
764 : :
765 : : /* Initialize span and pagemap. */
766 : 2776 : LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
767 : : LW_EXCLUSIVE);
768 : 2776 : init_span(area, span_pointer, pool, start_pointer, npages,
769 : : DSA_SCLASS_SPAN_LARGE);
770 : 2776 : segment_map->pagemap[first_page] = span_pointer;
771 : 2776 : LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
772 : :
773 : : /* Zero-initialize the memory if requested. */
3222 774 [ + + ]: 2776 : if ((flags & DSA_ALLOC_ZERO) != 0)
775 : 746 : memset(dsa_get_address(area, start_pointer), 0, size);
776 : :
3301 777 : 2776 : return start_pointer;
778 : : }
779 : :
780 : : /* Map allocation to a size class. */
781 [ + + ]: 644528 : if (size < lengthof(dsa_size_class_map) * DSA_SIZE_CLASS_MAP_QUANTUM)
782 : : {
783 : : int mapidx;
784 : :
785 : : /* For smaller sizes we have a lookup table... */
786 : 623551 : mapidx = ((size + DSA_SIZE_CLASS_MAP_QUANTUM - 1) /
787 : 623551 : DSA_SIZE_CLASS_MAP_QUANTUM) - 1;
788 : 623551 : size_class = dsa_size_class_map[mapidx];
789 : : }
790 : : else
791 : : {
792 : : uint16 min;
793 : : uint16 max;
794 : :
795 : : /* ... and for the rest we search by binary chop. */
796 : 20977 : min = dsa_size_class_map[lengthof(dsa_size_class_map) - 1];
797 : 20977 : max = lengthof(dsa_size_classes) - 1;
798 : :
799 [ + + ]: 87003 : while (min < max)
800 : : {
801 : 66026 : uint16 mid = (min + max) / 2;
802 : 66026 : uint16 class_size = dsa_size_classes[mid];
803 : :
804 [ + + ]: 66026 : if (class_size < size)
805 : 39140 : min = mid + 1;
806 : : else
807 : 26886 : max = mid;
808 : : }
809 : :
810 : 20977 : size_class = min;
811 : : }
812 [ - + ]: 644528 : Assert(size <= dsa_size_classes[size_class]);
813 [ + - - + ]: 644528 : Assert(size_class == 0 || size > dsa_size_classes[size_class - 1]);
814 : :
815 : : /* Attempt to allocate an object from the appropriate pool. */
3222 816 : 644528 : result = alloc_object(area, size_class);
817 : :
818 : : /* Check for failure to allocate. */
819 [ - + ]: 644528 : if (!DsaPointerIsValid(result))
820 : : {
821 : : /* Raise error unless asked not to. */
3222 rhaas@postgresql.org 822 [ # # ]:UBC 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
823 [ # # ]: 0 : ereport(ERROR,
824 : : (errcode(ERRCODE_OUT_OF_MEMORY),
825 : : errmsg("out of memory"),
826 : : errdetail("Failed on DSA request of size %zu.", size)));
827 : 0 : return InvalidDsaPointer;
828 : : }
829 : :
830 : : /* Zero-initialize the memory if requested. */
3222 rhaas@postgresql.org 831 [ + + ]:CBC 644528 : if ((flags & DSA_ALLOC_ZERO) != 0)
832 : 317860 : memset(dsa_get_address(area, result), 0, size);
833 : :
834 : 644528 : return result;
835 : : }
836 : :
837 : : /*
838 : : * Free memory obtained with dsa_allocate.
839 : : */
840 : : void
3301 841 : 120654 : dsa_free(dsa_area *area, dsa_pointer dp)
842 : : {
843 : : dsa_segment_map *segment_map;
844 : : int pageno;
845 : : dsa_pointer span_pointer;
846 : : dsa_area_span *span;
847 : : char *superblock;
848 : : char *object;
849 : : size_t size;
850 : : int size_class;
851 : :
852 : : /* Make sure we don't have a stale segment in the slot 'dp' refers to. */
853 : 120654 : check_for_freed_segments(area);
854 : :
855 : : /* Locate the object, span and pool. */
856 : 120654 : segment_map = get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(dp));
857 : 120654 : pageno = DSA_EXTRACT_OFFSET(dp) / FPM_PAGE_SIZE;
858 : 120654 : span_pointer = segment_map->pagemap[pageno];
859 : 120654 : span = dsa_get_address(area, span_pointer);
860 : 120654 : superblock = dsa_get_address(area, span->start);
861 : 120654 : object = dsa_get_address(area, dp);
862 : 120654 : size_class = span->size_class;
863 : 120654 : size = dsa_size_classes[size_class];
864 : :
865 : : /*
866 : : * Special case for large objects that live in a special span: we return
867 : : * those pages directly to the free page manager and free the span.
868 : : */
869 [ + + ]: 120654 : if (span->size_class == DSA_SCLASS_SPAN_LARGE)
870 : : {
871 : :
872 : : #ifdef CLOBBER_FREED_MEMORY
873 : 2125 : memset(object, 0x7f, span->npages * FPM_PAGE_SIZE);
874 : : #endif
875 : :
876 : : /* Give pages back to free page manager. */
877 : 2125 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
878 : 2125 : FreePageManagerPut(segment_map->fpm,
879 : 2125 : DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
880 : : span->npages);
881 : :
882 : : /* Move segment to appropriate bin if necessary. */
896 tmunro@postgresql.or 883 : 2125 : rebin_segment(area, segment_map);
3301 rhaas@postgresql.org 884 : 2125 : LWLockRelease(DSA_AREA_LOCK(area));
885 : :
886 : : /* Unlink span. */
887 : 2125 : LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
888 : : LW_EXCLUSIVE);
889 : 2125 : unlink_span(area, span);
890 : 2125 : LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
891 : : /* Free the span object so it can be reused. */
892 : 2125 : dsa_free(area, span_pointer);
893 : 2125 : return;
894 : : }
895 : :
896 : : #ifdef CLOBBER_FREED_MEMORY
897 : 118529 : memset(object, 0x7f, size);
898 : : #endif
899 : :
900 : 118529 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
901 : :
902 : : /* Put the object on the span's freelist. */
903 [ - + ]: 118529 : Assert(object >= superblock);
904 [ - + ]: 118529 : Assert(object < superblock + DSA_SUPERBLOCK_SIZE);
905 [ - + ]: 118529 : Assert((object - superblock) % size == 0);
906 : 118529 : NextFreeObjectIndex(object) = span->firstfree;
907 : 118529 : span->firstfree = (object - superblock) / size;
908 : 118529 : ++span->nallocatable;
909 : :
910 : : /*
911 : : * See if the span needs to moved to a different fullness class, or be
912 : : * freed so its pages can be given back to the segment.
913 : : */
914 [ + + + - ]: 118529 : if (span->nallocatable == 1 && span->fclass == DSA_FULLNESS_CLASSES - 1)
915 : : {
916 : : /*
917 : : * The block was completely full and is located in the
918 : : * highest-numbered fullness class, which is never scanned for free
919 : : * chunks. We must move it to the next-lower fullness class.
920 : : */
921 : 149 : unlink_span(area, span);
922 : 149 : add_span_to_fullness_class(area, span, span_pointer,
923 : : DSA_FULLNESS_CLASSES - 2);
924 : :
925 : : /*
926 : : * If this is the only span, and there is no active span, then we
927 : : * should probably move this span to fullness class 1. (Otherwise if
928 : : * you allocate exactly all the objects in the only span, it moves to
929 : : * class 3, then you free them all, it moves to 2, and then is given
930 : : * back, leaving no active span).
931 : : */
932 : : }
933 [ + + ]: 118380 : else if (span->nallocatable == span->nmax &&
934 [ + + - + ]: 3861 : (span->fclass != 1 || span->prevspan != InvalidDsaPointer))
935 : : {
936 : : /*
937 : : * This entire block is free, and it's not the active block for this
938 : : * size class. Return the memory to the free page manager. We don't
939 : : * do this for the active block to prevent hysteresis: if we
940 : : * repeatedly allocate and free the only chunk in the active block, it
941 : : * will be very inefficient if we deallocate and reallocate the block
942 : : * every time.
943 : : */
944 : 8 : destroy_superblock(area, span_pointer);
945 : : }
946 : :
947 : 118529 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
948 : : }
949 : :
950 : : /*
951 : : * Obtain a backend-local address for a dsa_pointer. 'dp' must point to
952 : : * memory allocated by the given area (possibly in another process) that
953 : : * hasn't yet been freed. This may cause a segment to be mapped into the
954 : : * current process if required, and may cause freed segments to be unmapped.
955 : : */
956 : : void *
957 : 11437396 : dsa_get_address(dsa_area *area, dsa_pointer dp)
958 : : {
959 : : dsa_segment_index index;
960 : : size_t offset;
961 : :
962 : : /* Convert InvalidDsaPointer to NULL. */
963 [ + + ]: 11437396 : if (!DsaPointerIsValid(dp))
964 : 1390248 : return NULL;
965 : :
966 : : /* Process any requests to detach from freed segments. */
967 : 10047148 : check_for_freed_segments(area);
968 : :
969 : : /* Break the dsa_pointer into its components. */
970 : 10047148 : index = DSA_EXTRACT_SEGMENT_NUMBER(dp);
971 : 10047148 : offset = DSA_EXTRACT_OFFSET(dp);
972 [ - + ]: 10047148 : Assert(index < DSA_MAX_SEGMENTS);
973 : :
974 : : /* Check if we need to cause this segment to be mapped in. */
975 [ + + ]: 10047148 : if (unlikely(area->segment_maps[index].mapped_address == NULL))
976 : : {
977 : : /* Call for effect (we don't need the result). */
978 : 17011 : get_segment_by_index(area, index);
979 : : }
980 : :
981 : 10047148 : return area->segment_maps[index].mapped_address + offset;
982 : : }
983 : :
984 : : /*
985 : : * Pin this area, so that it will continue to exist even if all backends
986 : : * detach from it. In that case, the area can still be reattached to if a
987 : : * handle has been recorded somewhere.
988 : : */
989 : : void
990 : 1132 : dsa_pin(dsa_area *area)
991 : : {
992 : 1132 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
993 [ - + ]: 1132 : if (area->control->pinned)
994 : : {
3301 rhaas@postgresql.org 995 :UBC 0 : LWLockRelease(DSA_AREA_LOCK(area));
996 [ # # ]: 0 : elog(ERROR, "dsa_area already pinned");
997 : : }
3301 rhaas@postgresql.org 998 :CBC 1132 : area->control->pinned = true;
999 : 1132 : ++area->control->refcnt;
1000 : 1132 : LWLockRelease(DSA_AREA_LOCK(area));
1001 : 1132 : }
1002 : :
1003 : : /*
1004 : : * Undo the effects of dsa_pin, so that the given area can be freed when no
1005 : : * backends are attached to it. May be called only if dsa_pin has been
1006 : : * called.
1007 : : */
1008 : : void
3301 rhaas@postgresql.org 1009 :UBC 0 : dsa_unpin(dsa_area *area)
1010 : : {
1011 : 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1012 [ # # ]: 0 : Assert(area->control->refcnt > 1);
1013 [ # # ]: 0 : if (!area->control->pinned)
1014 : : {
1015 : 0 : LWLockRelease(DSA_AREA_LOCK(area));
1016 [ # # ]: 0 : elog(ERROR, "dsa_area not pinned");
1017 : : }
1018 : 0 : area->control->pinned = false;
1019 : 0 : --area->control->refcnt;
1020 : 0 : LWLockRelease(DSA_AREA_LOCK(area));
1021 : 0 : }
1022 : :
1023 : : /*
1024 : : * Set the total size limit for this area. This limit is checked whenever new
1025 : : * segments need to be allocated from the operating system. If the new size
1026 : : * limit is already exceeded, this has no immediate effect.
1027 : : *
1028 : : * Note that the total virtual memory usage may be temporarily larger than
1029 : : * this limit when segments have been freed, but not yet detached by all
1030 : : * backends that have attached to them.
1031 : : */
1032 : : void
2642 tmunro@postgresql.or 1033 :CBC 2138 : dsa_set_size_limit(dsa_area *area, size_t limit)
1034 : : {
3301 rhaas@postgresql.org 1035 : 2138 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1036 : 2138 : area->control->max_total_segment_size = limit;
1037 : 2138 : LWLockRelease(DSA_AREA_LOCK(area));
1038 : 2138 : }
1039 : :
1040 : : /* Return the total size of all active segments */
1041 : : size_t
649 john.naylor@postgres 1042 : 1234 : dsa_get_total_size(dsa_area *area)
1043 : : {
1044 : : size_t size;
1045 : :
14 nathan@postgresql.or 1046 :GNC 1234 : LWLockAcquire(DSA_AREA_LOCK(area), LW_SHARED);
649 john.naylor@postgres 1047 :CBC 1234 : size = area->control->total_segment_size;
1048 : 1234 : LWLockRelease(DSA_AREA_LOCK(area));
1049 : :
1050 : 1234 : return size;
1051 : : }
1052 : :
1053 : : /*
1054 : : * Same as dsa_get_total_size(), but accepts a DSA handle. The area must have
1055 : : * been created with dsa_create (not dsa_create_in_place).
1056 : : */
1057 : : size_t
14 nathan@postgresql.or 1058 :GNC 2 : dsa_get_total_size_from_handle(dsa_handle handle)
1059 : : {
1060 : : size_t size;
1061 : : bool already_attached;
1062 : : dsm_segment *segment;
1063 : : dsa_area_control *control;
1064 : :
1065 : 2 : already_attached = dsa_is_attached(handle);
1066 [ - + ]: 2 : if (already_attached)
14 nathan@postgresql.or 1067 :UNC 0 : segment = dsm_find_mapping(handle);
1068 : : else
14 nathan@postgresql.or 1069 :GNC 2 : segment = dsm_attach(handle);
1070 : :
1071 [ - + ]: 2 : if (segment == NULL)
14 nathan@postgresql.or 1072 [ # # ]:UNC 0 : ereport(ERROR,
1073 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1074 : : errmsg("could not attach to dynamic shared area")));
1075 : :
14 nathan@postgresql.or 1076 :GNC 2 : control = (dsa_area_control *) dsm_segment_address(segment);
1077 : :
1078 : 2 : LWLockAcquire(&control->lock, LW_SHARED);
1079 : 2 : size = control->total_segment_size;
1080 : 2 : LWLockRelease(&control->lock);
1081 : :
1082 [ + - ]: 2 : if (!already_attached)
1083 : 2 : dsm_detach(segment);
1084 : :
1085 : 2 : return size;
1086 : : }
1087 : :
1088 : : /*
1089 : : * Aggressively free all spare memory in the hope of returning DSM segments to
1090 : : * the operating system.
1091 : : */
1092 : : void
3301 rhaas@postgresql.org 1093 :UBC 0 : dsa_trim(dsa_area *area)
1094 : : {
1095 : : int size_class;
1096 : :
1097 : : /*
1098 : : * Trim in reverse pool order so we get to the spans-of-spans last, just
1099 : : * in case any become entirely free while processing all the other pools.
1100 : : */
1101 [ # # ]: 0 : for (size_class = DSA_NUM_SIZE_CLASSES - 1; size_class >= 0; --size_class)
1102 : : {
1103 : 0 : dsa_area_pool *pool = &area->control->pools[size_class];
1104 : : dsa_pointer span_pointer;
1105 : :
1106 [ # # ]: 0 : if (size_class == DSA_SCLASS_SPAN_LARGE)
1107 : : {
1108 : : /* Large object frees give back segments aggressively already. */
1109 : 0 : continue;
1110 : : }
1111 : :
1112 : : /*
1113 : : * Search fullness class 1 only. That is where we expect to find an
1114 : : * entirely empty superblock (entirely empty superblocks in other
1115 : : * fullness classes are returned to the free page map by dsa_free).
1116 : : */
1117 : 0 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
1118 : 0 : span_pointer = pool->spans[1];
1119 [ # # ]: 0 : while (DsaPointerIsValid(span_pointer))
1120 : : {
1121 : 0 : dsa_area_span *span = dsa_get_address(area, span_pointer);
1122 : 0 : dsa_pointer next = span->nextspan;
1123 : :
1124 [ # # ]: 0 : if (span->nallocatable == span->nmax)
1125 : 0 : destroy_superblock(area, span_pointer);
1126 : :
1127 : 0 : span_pointer = next;
1128 : : }
1129 : 0 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
1130 : : }
1131 : 0 : }
1132 : :
1133 : : /*
1134 : : * Print out debugging information about the internal state of the shared
1135 : : * memory area.
1136 : : */
1137 : : void
1138 : 0 : dsa_dump(dsa_area *area)
1139 : : {
1140 : : size_t i,
1141 : : j;
1142 : :
1143 : : /*
1144 : : * Note: This gives an inconsistent snapshot as it acquires and releases
1145 : : * individual locks as it goes...
1146 : : */
1147 : :
1148 : 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
2644 tmunro@postgresql.or 1149 : 0 : check_for_freed_segments_locked(area);
3301 rhaas@postgresql.org 1150 : 0 : fprintf(stderr, "dsa_area handle %x:\n", area->control->handle);
1151 : 0 : fprintf(stderr, " max_total_segment_size: %zu\n",
1152 : 0 : area->control->max_total_segment_size);
1153 : 0 : fprintf(stderr, " total_segment_size: %zu\n",
1154 : 0 : area->control->total_segment_size);
1155 : 0 : fprintf(stderr, " refcnt: %d\n", area->control->refcnt);
1156 [ # # ]: 0 : fprintf(stderr, " pinned: %c\n", area->control->pinned ? 't' : 'f');
1157 : 0 : fprintf(stderr, " segment bins:\n");
1158 [ # # ]: 0 : for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
1159 : : {
1160 [ # # ]: 0 : if (area->control->segment_bins[i] != DSA_SEGMENT_INDEX_NONE)
1161 : : {
1162 : : dsa_segment_index segment_index;
1163 : :
656 dgustafsson@postgres 1164 [ # # ]: 0 : if (i == 0)
1165 : 0 : fprintf(stderr,
1166 : : " segment bin %zu (no contiguous free pages):\n", i);
1167 : : else
1168 : 0 : fprintf(stderr,
1169 : : " segment bin %zu (at least %d contiguous pages free):\n",
1170 : 0 : i, 1 << (i - 1));
3301 rhaas@postgresql.org 1171 : 0 : segment_index = area->control->segment_bins[i];
1172 [ # # ]: 0 : while (segment_index != DSA_SEGMENT_INDEX_NONE)
1173 : : {
1174 : : dsa_segment_map *segment_map;
1175 : :
1176 : : segment_map =
1177 : 0 : get_segment_by_index(area, segment_index);
1178 : :
1179 : 0 : fprintf(stderr,
1180 : : " segment index %zu, usable_pages = %zu, "
1181 : : "contiguous_pages = %zu, mapped at %p\n",
1182 : : segment_index,
1183 : 0 : segment_map->header->usable_pages,
1184 : 0 : fpm_largest(segment_map->fpm),
1185 : : segment_map->mapped_address);
1186 : 0 : segment_index = segment_map->header->next;
1187 : : }
1188 : : }
1189 : : }
1190 : 0 : LWLockRelease(DSA_AREA_LOCK(area));
1191 : :
1192 : 0 : fprintf(stderr, " pools:\n");
1193 [ # # ]: 0 : for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
1194 : : {
1195 : 0 : bool found = false;
1196 : :
1197 : 0 : LWLockAcquire(DSA_SCLASS_LOCK(area, i), LW_EXCLUSIVE);
1198 [ # # ]: 0 : for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
1199 [ # # ]: 0 : if (DsaPointerIsValid(area->control->pools[i].spans[j]))
1200 : 0 : found = true;
1201 [ # # ]: 0 : if (found)
1202 : : {
1203 [ # # ]: 0 : if (i == DSA_SCLASS_BLOCK_OF_SPANS)
1204 : 0 : fprintf(stderr, " pool for blocks of span objects:\n");
1205 [ # # ]: 0 : else if (i == DSA_SCLASS_SPAN_LARGE)
1206 : 0 : fprintf(stderr, " pool for large object spans:\n");
1207 : : else
1208 : 0 : fprintf(stderr,
1209 : : " pool for size class %zu (object size %hu bytes):\n",
1210 : 0 : i, dsa_size_classes[i]);
1211 [ # # ]: 0 : for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
1212 : : {
1213 [ # # ]: 0 : if (!DsaPointerIsValid(area->control->pools[i].spans[j]))
1214 : 0 : fprintf(stderr, " fullness class %zu is empty\n", j);
1215 : : else
1216 : : {
1217 : 0 : dsa_pointer span_pointer = area->control->pools[i].spans[j];
1218 : :
1219 : 0 : fprintf(stderr, " fullness class %zu:\n", j);
1220 [ # # ]: 0 : while (DsaPointerIsValid(span_pointer))
1221 : : {
1222 : : dsa_area_span *span;
1223 : :
1224 : 0 : span = dsa_get_address(area, span_pointer);
1225 : 0 : fprintf(stderr,
1226 : : " span descriptor at "
1227 : : DSA_POINTER_FORMAT ", superblock at "
1228 : : DSA_POINTER_FORMAT
1229 : : ", pages = %zu, objects free = %hu/%hu\n",
1230 : : span_pointer, span->start, span->npages,
1231 : 0 : span->nallocatable, span->nmax);
1232 : 0 : span_pointer = span->nextspan;
1233 : : }
1234 : : }
1235 : : }
1236 : : }
1237 : 0 : LWLockRelease(DSA_SCLASS_LOCK(area, i));
1238 : : }
1239 : 0 : }
1240 : :
1241 : : /*
1242 : : * Return the smallest size that you can successfully provide to
1243 : : * dsa_create_in_place.
1244 : : */
1245 : : size_t
3301 rhaas@postgresql.org 1246 :CBC 7190 : dsa_minimum_size(void)
1247 : : {
1248 : : size_t size;
1249 : 7190 : int pages = 0;
1250 : :
1251 : 7190 : size = MAXALIGN(sizeof(dsa_area_control)) +
1252 : : MAXALIGN(sizeof(FreePageManager));
1253 : :
1254 : : /* Figure out how many pages we need, including the page map... */
1255 [ + + ]: 21570 : while (((size + FPM_PAGE_SIZE - 1) / FPM_PAGE_SIZE) > pages)
1256 : : {
1257 : 14380 : ++pages;
1258 : 14380 : size += sizeof(dsa_pointer);
1259 : : }
1260 : :
1261 : 7190 : return pages * FPM_PAGE_SIZE;
1262 : : }
1263 : :
1264 : : /*
1265 : : * Workhorse function for dsa_create and dsa_create_in_place.
1266 : : */
1267 : : static dsa_area *
1268 : 1619 : create_internal(void *place, size_t size,
1269 : : int tranche_id,
1270 : : dsm_handle control_handle,
1271 : : dsm_segment *control_segment,
1272 : : size_t init_segment_size, size_t max_segment_size)
1273 : : {
1274 : : dsa_area_control *control;
1275 : : dsa_area *area;
1276 : : dsa_segment_map *segment_map;
1277 : : size_t usable_pages;
1278 : : size_t total_pages;
1279 : : size_t metadata_bytes;
1280 : : int i;
1281 : :
1282 : : /* Check the initial and maximum block sizes */
629 msawada@postgresql.o 1283 [ - + ]: 1619 : Assert(init_segment_size >= DSA_MIN_SEGMENT_SIZE);
1284 [ - + ]: 1619 : Assert(max_segment_size >= init_segment_size);
1285 [ - + ]: 1619 : Assert(max_segment_size <= DSA_MAX_SEGMENT_SIZE);
1286 : :
1287 : : /* Sanity check on the space we have to work in. */
3301 rhaas@postgresql.org 1288 [ - + ]: 1619 : if (size < dsa_minimum_size())
3301 rhaas@postgresql.org 1289 [ # # ]:UBC 0 : elog(ERROR, "dsa_area space must be at least %zu, but %zu provided",
1290 : : dsa_minimum_size(), size);
1291 : :
1292 : : /* Now figure out how much space is usable */
3301 rhaas@postgresql.org 1293 :CBC 1619 : total_pages = size / FPM_PAGE_SIZE;
1294 : 1619 : metadata_bytes =
1295 : : MAXALIGN(sizeof(dsa_area_control)) +
1296 : 1619 : MAXALIGN(sizeof(FreePageManager)) +
1297 : : total_pages * sizeof(dsa_pointer);
1298 : : /* Add padding up to next page boundary. */
1299 [ + - ]: 1619 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
1300 : 1619 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
1301 [ - + ]: 1619 : Assert(metadata_bytes <= size);
1302 : 1619 : usable_pages = (size - metadata_bytes) / FPM_PAGE_SIZE;
1303 : :
1304 : : /*
1305 : : * Initialize the dsa_area_control object located at the start of the
1306 : : * space.
1307 : : */
1308 : 1619 : control = (dsa_area_control *) place;
1964 tmunro@postgresql.or 1309 : 1619 : memset(place, 0, sizeof(*control));
3301 rhaas@postgresql.org 1310 : 1619 : control->segment_header.magic =
1311 : 1619 : DSA_SEGMENT_HEADER_MAGIC ^ control_handle ^ 0;
1312 : 1619 : control->segment_header.next = DSA_SEGMENT_INDEX_NONE;
1313 : 1619 : control->segment_header.prev = DSA_SEGMENT_INDEX_NONE;
1314 : 1619 : control->segment_header.usable_pages = usable_pages;
1315 : 1619 : control->segment_header.freed = false;
629 msawada@postgresql.o 1316 : 1619 : control->segment_header.size = size;
3301 rhaas@postgresql.org 1317 : 1619 : control->handle = control_handle;
629 msawada@postgresql.o 1318 : 1619 : control->init_segment_size = init_segment_size;
1319 : 1619 : control->max_segment_size = max_segment_size;
2642 tmunro@postgresql.or 1320 : 1619 : control->max_total_segment_size = (size_t) -1;
3301 rhaas@postgresql.org 1321 : 1619 : control->total_segment_size = size;
1322 : 1619 : control->segment_handles[0] = control_handle;
1323 [ + + ]: 27523 : for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
1324 : 25904 : control->segment_bins[i] = DSA_SEGMENT_INDEX_NONE;
1325 : 1619 : control->refcnt = 1;
1326 : 1619 : control->lwlock_tranche_id = tranche_id;
1327 : :
1328 : : /*
1329 : : * Create the dsa_area object that this backend will use to access the
1330 : : * area. Other backends will need to obtain their own dsa_area object by
1331 : : * attaching.
1332 : : */
6 michael@paquier.xyz 1333 :GNC 1619 : area = palloc_object(dsa_area);
3301 rhaas@postgresql.org 1334 :CBC 1619 : area->control = control;
762 heikki.linnakangas@i 1335 : 1619 : area->resowner = CurrentResourceOwner;
3301 rhaas@postgresql.org 1336 : 1619 : memset(area->segment_maps, 0, sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
1337 : 1619 : area->high_segment_index = 0;
3172 andres@anarazel.de 1338 : 1619 : area->freed_segment_counter = 0;
3301 rhaas@postgresql.org 1339 : 1619 : LWLockInitialize(&control->lock, control->lwlock_tranche_id);
1340 [ + + ]: 63141 : for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
1341 : 61522 : LWLockInitialize(DSA_SCLASS_LOCK(area, i),
1342 : : control->lwlock_tranche_id);
1343 : :
1344 : : /* Set up the segment map for this process's mapping. */
1345 : 1619 : segment_map = &area->segment_maps[0];
1346 : 1619 : segment_map->segment = control_segment;
1347 : 1619 : segment_map->mapped_address = place;
1348 : 1619 : segment_map->header = (dsa_segment_header *) place;
1349 : 1619 : segment_map->fpm = (FreePageManager *)
1350 : 1619 : (segment_map->mapped_address +
1351 : : MAXALIGN(sizeof(dsa_area_control)));
1352 : 1619 : segment_map->pagemap = (dsa_pointer *)
1353 : 1619 : (segment_map->mapped_address +
1354 : 1619 : MAXALIGN(sizeof(dsa_area_control)) +
1355 : : MAXALIGN(sizeof(FreePageManager)));
1356 : :
1357 : : /* Set up the free page map. */
1358 : 1619 : FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
1359 : : /* There can be 0 usable pages if size is dsa_minimum_size(). */
1360 : :
1361 [ + + ]: 1619 : if (usable_pages > 0)
1362 : 1241 : FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
1363 : : usable_pages);
1364 : :
1365 : : /* Put this segment into the appropriate bin. */
1366 : 1619 : control->segment_bins[contiguous_pages_to_segment_bin(usable_pages)] = 0;
1367 : 1619 : segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
1368 : :
1369 : 1619 : return area;
1370 : : }
1371 : :
1372 : : /*
1373 : : * Workhorse function for dsa_attach and dsa_attach_in_place.
1374 : : */
1375 : : static dsa_area *
1376 : 22726 : attach_internal(void *place, dsm_segment *segment, dsa_handle handle)
1377 : : {
1378 : : dsa_area_control *control;
1379 : : dsa_area *area;
1380 : : dsa_segment_map *segment_map;
1381 : :
1382 : 22726 : control = (dsa_area_control *) place;
1383 [ - + ]: 22726 : Assert(control->handle == handle);
1384 [ - + ]: 22726 : Assert(control->segment_handles[0] == handle);
1385 [ - + ]: 22726 : Assert(control->segment_header.magic ==
1386 : : (DSA_SEGMENT_HEADER_MAGIC ^ handle ^ 0));
1387 : :
1388 : : /* Build the backend-local area object. */
6 michael@paquier.xyz 1389 :GNC 22726 : area = palloc_object(dsa_area);
3301 rhaas@postgresql.org 1390 :CBC 22726 : area->control = control;
762 heikki.linnakangas@i 1391 : 22726 : area->resowner = CurrentResourceOwner;
3301 rhaas@postgresql.org 1392 : 22726 : memset(&area->segment_maps[0], 0,
1393 : : sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
1394 : 22726 : area->high_segment_index = 0;
1395 : :
1396 : : /* Set up the segment map for this process's mapping. */
1397 : 22726 : segment_map = &area->segment_maps[0];
3100 tgl@sss.pgh.pa.us 1398 : 22726 : segment_map->segment = segment; /* NULL for in-place */
3301 rhaas@postgresql.org 1399 : 22726 : segment_map->mapped_address = place;
1400 : 22726 : segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
1401 : 22726 : segment_map->fpm = (FreePageManager *)
1402 : 22726 : (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)));
1403 : 22726 : segment_map->pagemap = (dsa_pointer *)
1404 : 22726 : (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)) +
1405 : : MAXALIGN(sizeof(FreePageManager)));
1406 : :
1407 : : /* Bump the reference count. */
1408 : 22726 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
3184 1409 [ - + ]: 22726 : if (control->refcnt == 0)
1410 : : {
1411 : : /* We can't attach to a DSA area that has already been destroyed. */
3184 rhaas@postgresql.org 1412 [ # # ]:UBC 0 : ereport(ERROR,
1413 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1414 : : errmsg("could not attach to dynamic shared area")));
1415 : : }
3301 rhaas@postgresql.org 1416 :CBC 22726 : ++control->refcnt;
3172 andres@anarazel.de 1417 : 22726 : area->freed_segment_counter = area->control->freed_segment_counter;
3301 rhaas@postgresql.org 1418 : 22726 : LWLockRelease(DSA_AREA_LOCK(area));
1419 : :
1420 : 22726 : return area;
1421 : : }
1422 : :
1423 : : /*
1424 : : * Add a new span to fullness class 1 of the indicated pool.
1425 : : */
1426 : : static void
1427 : 13055 : init_span(dsa_area *area,
1428 : : dsa_pointer span_pointer,
1429 : : dsa_area_pool *pool, dsa_pointer start, size_t npages,
1430 : : uint16 size_class)
1431 : : {
1432 : 13055 : dsa_area_span *span = dsa_get_address(area, span_pointer);
2642 tmunro@postgresql.or 1433 : 13055 : size_t obsize = dsa_size_classes[size_class];
1434 : :
1435 : : /*
1436 : : * The per-pool lock must be held because we manipulate the span list for
1437 : : * this pool.
1438 : : */
3301 rhaas@postgresql.org 1439 [ - + ]: 13055 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1440 : :
1441 : : /* Push this span onto the front of the span list for fullness class 1. */
1442 [ + + ]: 13055 : if (DsaPointerIsValid(pool->spans[1]))
1443 : : {
1444 : : dsa_area_span *head = (dsa_area_span *)
942 tgl@sss.pgh.pa.us 1445 : 1997 : dsa_get_address(area, pool->spans[1]);
1446 : :
3301 rhaas@postgresql.org 1447 : 1997 : head->prevspan = span_pointer;
1448 : : }
1449 : 13055 : span->pool = DsaAreaPoolToDsaPointer(area, pool);
1450 : 13055 : span->nextspan = pool->spans[1];
1451 : 13055 : span->prevspan = InvalidDsaPointer;
1452 : 13055 : pool->spans[1] = span_pointer;
1453 : :
1454 : 13055 : span->start = start;
1455 : 13055 : span->npages = npages;
1456 : 13055 : span->size_class = size_class;
1457 : 13055 : span->ninitialized = 0;
1458 [ + + ]: 13055 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
1459 : : {
1460 : : /*
1461 : : * A block-of-spans contains its own descriptor, so mark one object as
1462 : : * initialized and reduce the count of allocatable objects by one.
1463 : : * Doing this here has the side effect of also reducing nmax by one,
1464 : : * which is important to make sure we free this object at the correct
1465 : : * time.
1466 : : */
1467 : 1327 : span->ninitialized = 1;
1468 : 1327 : span->nallocatable = FPM_PAGE_SIZE / obsize - 1;
1469 : : }
1470 [ + + ]: 11728 : else if (size_class != DSA_SCLASS_SPAN_LARGE)
1471 : 8952 : span->nallocatable = DSA_SUPERBLOCK_SIZE / obsize;
1472 : 13055 : span->firstfree = DSA_SPAN_NOTHING_FREE;
1473 : 13055 : span->nmax = span->nallocatable;
1474 : 13055 : span->fclass = 1;
1475 : 13055 : }
1476 : :
1477 : : /*
1478 : : * Transfer the first span in one fullness class to the head of another
1479 : : * fullness class.
1480 : : */
1481 : : static bool
1482 : 21666 : transfer_first_span(dsa_area *area,
1483 : : dsa_area_pool *pool, int fromclass, int toclass)
1484 : : {
1485 : : dsa_pointer span_pointer;
1486 : : dsa_area_span *span;
1487 : : dsa_area_span *nextspan;
1488 : :
1489 : : /* Can't do it if source list is empty. */
1490 : 21666 : span_pointer = pool->spans[fromclass];
1491 [ + + ]: 21666 : if (!DsaPointerIsValid(span_pointer))
1492 : 20560 : return false;
1493 : :
1494 : : /* Remove span from head of source list. */
1495 : 1106 : span = dsa_get_address(area, span_pointer);
1496 : 1106 : pool->spans[fromclass] = span->nextspan;
1497 [ + + ]: 1106 : if (DsaPointerIsValid(span->nextspan))
1498 : : {
1499 : : nextspan = (dsa_area_span *)
1500 : 53 : dsa_get_address(area, span->nextspan);
1501 : 53 : nextspan->prevspan = InvalidDsaPointer;
1502 : : }
1503 : :
1504 : : /* Add span to head of target list. */
1505 : 1106 : span->nextspan = pool->spans[toclass];
1506 : 1106 : pool->spans[toclass] = span_pointer;
1507 [ + + ]: 1106 : if (DsaPointerIsValid(span->nextspan))
1508 : : {
1509 : : nextspan = (dsa_area_span *)
1510 : 356 : dsa_get_address(area, span->nextspan);
1511 : 356 : nextspan->prevspan = span_pointer;
1512 : : }
1513 : 1106 : span->fclass = toclass;
1514 : :
1515 : 1106 : return true;
1516 : : }
1517 : :
1518 : : /*
1519 : : * Allocate one object of the requested size class from the given area.
1520 : : */
1521 : : static inline dsa_pointer
1522 : 656256 : alloc_object(dsa_area *area, int size_class)
1523 : : {
1524 : 656256 : dsa_area_pool *pool = &area->control->pools[size_class];
1525 : : dsa_area_span *span;
1526 : : dsa_pointer block;
1527 : : dsa_pointer result;
1528 : : char *object;
1529 : : size_t size;
1530 : :
1531 : : /*
1532 : : * Even though ensure_active_superblock can in turn call alloc_object if
1533 : : * it needs to allocate a new span, that's always from a different pool,
1534 : : * and the order of lock acquisition is always the same, so it's OK that
1535 : : * we hold this lock for the duration of this function.
1536 : : */
1537 [ - + ]: 656256 : Assert(!LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1538 : 656256 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
1539 : :
1540 : : /*
1541 : : * If there's no active superblock, we must successfully obtain one or
1542 : : * fail the request.
1543 : : */
1544 [ + + ]: 656256 : if (!DsaPointerIsValid(pool->spans[1]) &&
1545 [ - + ]: 10366 : !ensure_active_superblock(area, pool, size_class))
1546 : : {
3301 rhaas@postgresql.org 1547 :UBC 0 : result = InvalidDsaPointer;
1548 : : }
1549 : : else
1550 : : {
1551 : : /*
1552 : : * There should be a block in fullness class 1 at this point, and it
1553 : : * should never be completely full. Thus we can either pop an object
1554 : : * from the free list or, failing that, initialize a new object.
1555 : : */
3301 rhaas@postgresql.org 1556 [ - + ]:CBC 656256 : Assert(DsaPointerIsValid(pool->spans[1]));
1557 : : span = (dsa_area_span *)
1558 : 656256 : dsa_get_address(area, pool->spans[1]);
1559 [ - + ]: 656256 : Assert(span->nallocatable > 0);
1560 : 656256 : block = span->start;
1561 [ - + ]: 656256 : Assert(size_class < DSA_NUM_SIZE_CLASSES);
1562 : 656256 : size = dsa_size_classes[size_class];
1563 [ + + ]: 656256 : if (span->firstfree != DSA_SPAN_NOTHING_FREE)
1564 : : {
1565 : 99096 : result = block + span->firstfree * size;
1566 : 99096 : object = dsa_get_address(area, result);
1567 : 99096 : span->firstfree = NextFreeObjectIndex(object);
1568 : : }
1569 : : else
1570 : : {
1571 : 557160 : result = block + span->ninitialized * size;
1572 : 557160 : ++span->ninitialized;
1573 : : }
1574 : 656256 : --span->nallocatable;
1575 : :
1576 : : /* If it's now full, move it to the highest-numbered fullness class. */
1577 [ + + ]: 656256 : if (span->nallocatable == 0)
1578 : 1021 : transfer_first_span(area, pool, 1, DSA_FULLNESS_CLASSES - 1);
1579 : : }
1580 : :
1581 [ - + ]: 656256 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1582 : 656256 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
1583 : :
1584 : 656256 : return result;
1585 : : }
1586 : :
1587 : : /*
1588 : : * Ensure an active (i.e. fullness class 1) superblock, unless all existing
1589 : : * superblocks are completely full and no more can be allocated.
1590 : : *
1591 : : * Fullness classes K of 0..N are loosely intended to represent blocks whose
1592 : : * utilization percentage is at least K/N, but we only enforce this rigorously
1593 : : * for the highest-numbered fullness class, which always contains exactly
1594 : : * those blocks that are completely full. It's otherwise acceptable for a
1595 : : * block to be in a higher-numbered fullness class than the one to which it
1596 : : * logically belongs. In addition, the active block, which is always the
1597 : : * first block in fullness class 1, is permitted to have a higher allocation
1598 : : * percentage than would normally be allowable for that fullness class; we
1599 : : * don't move it until it's completely full, and then it goes to the
1600 : : * highest-numbered fullness class.
1601 : : *
1602 : : * It might seem odd that the active block is the head of fullness class 1
1603 : : * rather than fullness class 0, but experience with other allocators has
1604 : : * shown that it's usually better to allocate from a block that's moderately
1605 : : * full rather than one that's nearly empty. Insofar as is reasonably
1606 : : * possible, we want to avoid performing new allocations in a block that would
1607 : : * otherwise become empty soon.
1608 : : */
1609 : : static bool
1610 : 10366 : ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
1611 : : int size_class)
1612 : : {
1613 : : dsa_pointer span_pointer;
1614 : : dsa_pointer start_pointer;
2642 tmunro@postgresql.or 1615 : 10366 : size_t obsize = dsa_size_classes[size_class];
1616 : : size_t nmax;
1617 : : int fclass;
1618 : 10366 : size_t npages = 1;
1619 : : size_t first_page;
1620 : : size_t i;
1621 : : dsa_segment_map *segment_map;
1622 : :
3301 rhaas@postgresql.org 1623 [ - + ]: 10366 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1624 : :
1625 : : /*
1626 : : * Compute the number of objects that will fit in a block of this size
1627 : : * class. Span-of-spans blocks are just a single page, and the first
1628 : : * object isn't available for use because it describes the block-of-spans
1629 : : * itself.
1630 : : */
1631 [ + + ]: 10366 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
1632 : 1327 : nmax = FPM_PAGE_SIZE / obsize - 1;
1633 : : else
1634 : 9039 : nmax = DSA_SUPERBLOCK_SIZE / obsize;
1635 : :
1636 : : /*
1637 : : * If fullness class 1 is empty, try to find a span to put in it by
1638 : : * scanning higher-numbered fullness classes (excluding the last one,
1639 : : * whose blocks are certain to all be completely full).
1640 : : */
1641 [ + + ]: 20730 : for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
1642 : : {
1643 : 10366 : span_pointer = pool->spans[fclass];
1644 : :
1645 [ + + ]: 10536 : while (DsaPointerIsValid(span_pointer))
1646 : : {
1647 : : int tfclass;
1648 : : dsa_area_span *span;
1649 : : dsa_area_span *nextspan;
1650 : : dsa_area_span *prevspan;
1651 : : dsa_pointer next_span_pointer;
1652 : :
1653 : : span = (dsa_area_span *)
1654 : 170 : dsa_get_address(area, span_pointer);
1655 : 170 : next_span_pointer = span->nextspan;
1656 : :
1657 : : /* Figure out what fullness class should contain this span. */
1658 : 170 : tfclass = (nmax - span->nallocatable)
1659 : 170 : * (DSA_FULLNESS_CLASSES - 1) / nmax;
1660 : :
1661 : : /* Look up next span. */
1662 [ + + ]: 170 : if (DsaPointerIsValid(span->nextspan))
1663 : : nextspan = (dsa_area_span *)
1664 : 84 : dsa_get_address(area, span->nextspan);
1665 : : else
1666 : 86 : nextspan = NULL;
1667 : :
1668 : : /*
1669 : : * If utilization has dropped enough that this now belongs in some
1670 : : * other fullness class, move it there.
1671 : : */
1672 [ + + ]: 170 : if (tfclass < fclass)
1673 : : {
1674 : : /* Remove from the current fullness class list. */
1675 [ + - ]: 4 : if (pool->spans[fclass] == span_pointer)
1676 : : {
1677 : : /* It was the head; remove it. */
1678 [ - + ]: 4 : Assert(!DsaPointerIsValid(span->prevspan));
1679 : 4 : pool->spans[fclass] = span->nextspan;
1680 [ + + ]: 4 : if (nextspan != NULL)
1681 : 1 : nextspan->prevspan = InvalidDsaPointer;
1682 : : }
1683 : : else
1684 : : {
1685 : : /* It was not the head. */
3301 rhaas@postgresql.org 1686 [ # # ]:UBC 0 : Assert(DsaPointerIsValid(span->prevspan));
1687 : : prevspan = (dsa_area_span *)
1688 : 0 : dsa_get_address(area, span->prevspan);
1689 : 0 : prevspan->nextspan = span->nextspan;
1690 : : }
3301 rhaas@postgresql.org 1691 [ + + ]:CBC 4 : if (nextspan != NULL)
1692 : 1 : nextspan->prevspan = span->prevspan;
1693 : :
1694 : : /* Push onto the head of the new fullness class list. */
1695 : 4 : span->nextspan = pool->spans[tfclass];
1696 : 4 : pool->spans[tfclass] = span_pointer;
1697 : 4 : span->prevspan = InvalidDsaPointer;
1698 [ + + ]: 4 : if (DsaPointerIsValid(span->nextspan))
1699 : : {
1700 : : nextspan = (dsa_area_span *)
1701 : 1 : dsa_get_address(area, span->nextspan);
1702 : 1 : nextspan->prevspan = span_pointer;
1703 : : }
1704 : 4 : span->fclass = tfclass;
1705 : : }
1706 : :
1707 : : /* Advance to next span on list. */
1708 : 170 : span_pointer = next_span_pointer;
1709 : : }
1710 : :
1711 : : /* Stop now if we found a suitable block. */
1712 [ + + ]: 10366 : if (DsaPointerIsValid(pool->spans[1]))
3301 rhaas@postgresql.org 1713 :GBC 2 : return true;
1714 : : }
1715 : :
1716 : : /*
1717 : : * If there are no blocks that properly belong in fullness class 1, pick
1718 : : * one from some other fullness class and move it there anyway, so that we
1719 : : * have an allocation target. Our last choice is to transfer a block
1720 : : * that's almost empty (and might become completely empty soon if left
1721 : : * alone), but even that is better than failing, which is what we must do
1722 : : * if there are no blocks at all with freespace.
1723 : : */
3301 rhaas@postgresql.org 1724 [ - + ]:CBC 10364 : Assert(!DsaPointerIsValid(pool->spans[1]));
1725 [ + + ]: 20645 : for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
1726 [ + + ]: 10364 : if (transfer_first_span(area, pool, fclass, 1))
1727 : 83 : return true;
1728 [ + - + + ]: 20562 : if (!DsaPointerIsValid(pool->spans[1]) &&
1729 : 10281 : transfer_first_span(area, pool, 0, 1))
1730 : 2 : return true;
1731 : :
1732 : : /*
1733 : : * We failed to find an existing span with free objects, so we need to
1734 : : * allocate a new superblock and construct a new span to manage it.
1735 : : *
1736 : : * First, get a dsa_area_span object to describe the new superblock block
1737 : : * ... unless this allocation is for a dsa_area_span object, in which case
1738 : : * that's surely not going to work. We handle that case by storing the
1739 : : * span describing a block-of-spans inline.
1740 : : */
1741 [ + + ]: 10279 : if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
1742 : : {
1743 : 8952 : span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
1744 [ - + ]: 8952 : if (!DsaPointerIsValid(span_pointer))
3301 rhaas@postgresql.org 1745 :UBC 0 : return false;
3301 rhaas@postgresql.org 1746 :CBC 8952 : npages = DSA_PAGES_PER_SUPERBLOCK;
1747 : : }
1748 : :
1749 : : /* Find or create a segment and allocate the superblock. */
1750 : 10279 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1751 : 10279 : segment_map = get_best_segment(area, npages);
1752 [ + + ]: 10279 : if (segment_map == NULL)
1753 : : {
1754 : 1000 : segment_map = make_new_segment(area, npages);
1755 [ - + ]: 1000 : if (segment_map == NULL)
1756 : : {
3301 rhaas@postgresql.org 1757 :UBC 0 : LWLockRelease(DSA_AREA_LOCK(area));
1758 : 0 : return false;
1759 : : }
1760 : : }
1761 : :
1762 : : /*
1763 : : * This shouldn't happen: get_best_segment() or make_new_segment()
1764 : : * promised that we can successfully allocate npages.
1765 : : */
3301 rhaas@postgresql.org 1766 [ - + ]:CBC 10279 : if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
2486 tmunro@postgresql.or 1767 [ # # ]:UBC 0 : elog(FATAL,
1768 : : "dsa_allocate could not find %zu free pages for superblock",
1769 : : npages);
3301 rhaas@postgresql.org 1770 :CBC 10279 : LWLockRelease(DSA_AREA_LOCK(area));
1771 : :
1772 : : /* Compute the start of the superblock. */
1773 : 10279 : start_pointer =
1774 : 10279 : DSA_MAKE_POINTER(get_segment_index(area, segment_map),
1775 : : first_page * FPM_PAGE_SIZE);
1776 : :
1777 : : /*
1778 : : * If this is a block-of-spans, carve the descriptor right out of the
1779 : : * allocated space.
1780 : : */
1781 [ + + ]: 10279 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
1782 : : {
1783 : : /*
1784 : : * We have a pointer into the segment. We need to build a dsa_pointer
1785 : : * from the segment index and offset into the segment.
1786 : : */
1787 : 1327 : span_pointer = start_pointer;
1788 : : }
1789 : :
1790 : : /* Initialize span and pagemap. */
1791 : 10279 : init_span(area, span_pointer, pool, start_pointer, npages, size_class);
1792 [ + + ]: 154838 : for (i = 0; i < npages; ++i)
1793 : 144559 : segment_map->pagemap[first_page + i] = span_pointer;
1794 : :
1795 : 10279 : return true;
1796 : : }
1797 : :
1798 : : /*
1799 : : * Return the segment map corresponding to a given segment index, mapping the
1800 : : * segment in if necessary. For internal segment book-keeping, this is called
1801 : : * with the area lock held. It is also called by dsa_free and dsa_get_address
1802 : : * without any locking, relying on the fact they have a known live segment
1803 : : * index and they always call check_for_freed_segments to ensures that any
1804 : : * freed segment occupying the same slot is detached first.
1805 : : */
1806 : : static dsa_segment_map *
1807 : 150658 : get_segment_by_index(dsa_area *area, dsa_segment_index index)
1808 : : {
1809 [ + + ]: 150658 : if (unlikely(area->segment_maps[index].mapped_address == NULL))
1810 : : {
1811 : : dsm_handle handle;
1812 : : dsm_segment *segment;
1813 : : dsa_segment_map *segment_map;
1814 : : ResourceOwner oldowner;
1815 : :
1816 : : /*
1817 : : * If we are reached by dsa_free or dsa_get_address, there must be at
1818 : : * least one object allocated in the referenced segment. Otherwise,
1819 : : * their caller has a double-free or access-after-free bug, which we
1820 : : * have no hope of detecting. So we know it's safe to access this
1821 : : * array slot without holding a lock; it won't change underneath us.
1822 : : * Furthermore, we know that we can see the latest contents of the
1823 : : * slot, as explained in check_for_freed_segments, which those
1824 : : * functions call before arriving here.
1825 : : */
1826 : 17084 : handle = area->control->segment_handles[index];
1827 : :
1828 : : /* It's an error to try to access an unused slot. */
1829 [ - + ]: 17084 : if (handle == DSM_HANDLE_INVALID)
3301 rhaas@postgresql.org 1830 [ # # ]:UBC 0 : elog(ERROR,
1831 : : "dsa_area could not attach to a segment that has been freed");
1832 : :
762 heikki.linnakangas@i 1833 :CBC 17084 : oldowner = CurrentResourceOwner;
1834 : 17084 : CurrentResourceOwner = area->resowner;
3301 rhaas@postgresql.org 1835 : 17084 : segment = dsm_attach(handle);
762 heikki.linnakangas@i 1836 : 17084 : CurrentResourceOwner = oldowner;
3301 rhaas@postgresql.org 1837 [ - + ]: 17084 : if (segment == NULL)
3301 rhaas@postgresql.org 1838 [ # # ]:UBC 0 : elog(ERROR, "dsa_area could not attach to segment");
3301 rhaas@postgresql.org 1839 :CBC 17084 : segment_map = &area->segment_maps[index];
1840 : 17084 : segment_map->segment = segment;
1841 : 17084 : segment_map->mapped_address = dsm_segment_address(segment);
1842 : 17084 : segment_map->header =
1843 : 17084 : (dsa_segment_header *) segment_map->mapped_address;
1844 : 17084 : segment_map->fpm = (FreePageManager *)
1845 : 17084 : (segment_map->mapped_address +
1846 : : MAXALIGN(sizeof(dsa_segment_header)));
1847 : 17084 : segment_map->pagemap = (dsa_pointer *)
1848 : 17084 : (segment_map->mapped_address +
1849 : 17084 : MAXALIGN(sizeof(dsa_segment_header)) +
1850 : : MAXALIGN(sizeof(FreePageManager)));
1851 : :
1852 : : /* Remember the highest index this backend has ever mapped. */
1853 [ + + ]: 17084 : if (area->high_segment_index < index)
1854 : 16998 : area->high_segment_index = index;
1855 : :
1856 [ - + ]: 17084 : Assert(segment_map->header->magic ==
1857 : : (DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ index));
1858 : : }
1859 : :
1860 : : /*
1861 : : * Callers of dsa_get_address() and dsa_free() don't hold the area lock,
1862 : : * but it's a bug in the calling code and undefined behavior if the
1863 : : * address is not live (ie if the segment might possibly have been freed,
1864 : : * they're trying to use a dangling pointer).
1865 : : *
1866 : : * For dsa.c code that holds the area lock to manipulate segment_bins
1867 : : * lists, it would be a bug if we ever reach a freed segment here. After
1868 : : * it's marked as freed, the only thing any backend should do with it is
1869 : : * unmap it, and it should always have done that in
1870 : : * check_for_freed_segments_locked() before arriving here to resolve an
1871 : : * index to a segment_map.
1872 : : *
1873 : : * Either way we can assert that we aren't returning a freed segment.
1874 : : */
2644 tmunro@postgresql.or 1875 [ - + ]: 150658 : Assert(!area->segment_maps[index].header->freed);
1876 : :
3301 rhaas@postgresql.org 1877 : 150658 : return &area->segment_maps[index];
1878 : : }
1879 : :
1880 : : /*
1881 : : * Return a superblock to the free page manager. If the underlying segment
1882 : : * has become entirely free, then return it to the operating system.
1883 : : *
1884 : : * The appropriate pool lock must be held.
1885 : : */
1886 : : static void
1887 : 8 : destroy_superblock(dsa_area *area, dsa_pointer span_pointer)
1888 : : {
1889 : 8 : dsa_area_span *span = dsa_get_address(area, span_pointer);
1890 : 8 : int size_class = span->size_class;
1891 : : dsa_segment_map *segment_map;
1892 : :
1893 : :
1894 : : /* Remove it from its fullness class list. */
1895 : 8 : unlink_span(area, span);
1896 : :
1897 : : /*
1898 : : * Note: Here we acquire the area lock while we already hold a per-pool
1899 : : * lock. We never hold the area lock and then take a pool lock, or we
1900 : : * could deadlock.
1901 : : */
1902 : 8 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
2644 tmunro@postgresql.or 1903 : 8 : check_for_freed_segments_locked(area);
1904 : : segment_map =
1905 : 8 : get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(span->start));
3301 rhaas@postgresql.org 1906 : 8 : FreePageManagerPut(segment_map->fpm,
1907 : 8 : DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
1908 : : span->npages);
1909 : : /* Check if the segment is now entirely free. */
1910 [ - + ]: 8 : if (fpm_largest(segment_map->fpm) == segment_map->header->usable_pages)
1911 : : {
3301 rhaas@postgresql.org 1912 :UBC 0 : dsa_segment_index index = get_segment_index(area, segment_map);
1913 : :
1914 : : /* If it's not the segment with extra control data, free it. */
1915 [ # # ]: 0 : if (index != 0)
1916 : : {
1917 : : /*
1918 : : * Give it back to the OS, and allow other backends to detect that
1919 : : * they need to detach.
1920 : : */
1921 : 0 : unlink_segment(area, segment_map);
1922 : 0 : segment_map->header->freed = true;
1923 [ # # ]: 0 : Assert(area->control->total_segment_size >=
1924 : : segment_map->header->size);
1925 : 0 : area->control->total_segment_size -=
1926 : 0 : segment_map->header->size;
1927 : 0 : dsm_unpin_segment(dsm_segment_handle(segment_map->segment));
1928 : 0 : dsm_detach(segment_map->segment);
1929 : 0 : area->control->segment_handles[index] = DSM_HANDLE_INVALID;
1930 : 0 : ++area->control->freed_segment_counter;
1931 : 0 : segment_map->segment = NULL;
1932 : 0 : segment_map->header = NULL;
1933 : 0 : segment_map->mapped_address = NULL;
1934 : : }
1935 : : }
1936 : :
1937 : : /* Move segment to appropriate bin if necessary. */
896 tmunro@postgresql.or 1938 [ + - ]:CBC 8 : if (segment_map->header != NULL)
1939 : 8 : rebin_segment(area, segment_map);
1940 : :
3301 rhaas@postgresql.org 1941 : 8 : LWLockRelease(DSA_AREA_LOCK(area));
1942 : :
1943 : : /*
1944 : : * Span-of-spans blocks store the span which describes them within the
1945 : : * block itself, so freeing the storage implicitly frees the descriptor
1946 : : * also. If this is a block of any other type, we need to separately free
1947 : : * the span object also. This recursive call to dsa_free will acquire the
1948 : : * span pool's lock. We can't deadlock because the acquisition order is
1949 : : * always some other pool and then the span pool.
1950 : : */
1951 [ + - ]: 8 : if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
1952 : 8 : dsa_free(area, span_pointer);
1953 : 8 : }
1954 : :
1955 : : static void
1956 : 2282 : unlink_span(dsa_area *area, dsa_area_span *span)
1957 : : {
1958 [ + + ]: 2282 : if (DsaPointerIsValid(span->nextspan))
1959 : : {
1960 : 1836 : dsa_area_span *next = dsa_get_address(area, span->nextspan);
1961 : :
1962 : 1836 : next->prevspan = span->prevspan;
1963 : : }
1964 [ + + ]: 2282 : if (DsaPointerIsValid(span->prevspan))
1965 : : {
1966 : 1218 : dsa_area_span *prev = dsa_get_address(area, span->prevspan);
1967 : :
1968 : 1218 : prev->nextspan = span->nextspan;
1969 : : }
1970 : : else
1971 : : {
1972 : 1064 : dsa_area_pool *pool = dsa_get_address(area, span->pool);
1973 : :
1974 : 1064 : pool->spans[span->fclass] = span->nextspan;
1975 : : }
1976 : 2282 : }
1977 : :
1978 : : static void
1979 : 149 : add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
1980 : : dsa_pointer span_pointer,
1981 : : int fclass)
1982 : : {
1983 : 149 : dsa_area_pool *pool = dsa_get_address(area, span->pool);
1984 : :
1985 [ + + ]: 149 : if (DsaPointerIsValid(pool->spans[fclass]))
1986 : : {
1987 : 68 : dsa_area_span *head = dsa_get_address(area,
1988 : : pool->spans[fclass]);
1989 : :
1990 : 68 : head->prevspan = span_pointer;
1991 : : }
1992 : 149 : span->prevspan = InvalidDsaPointer;
1993 : 149 : span->nextspan = pool->spans[fclass];
1994 : 149 : pool->spans[fclass] = span_pointer;
1995 : 149 : span->fclass = fclass;
1996 : 149 : }
1997 : :
1998 : : /*
1999 : : * Detach from an area that was either created or attached to by this process.
2000 : : */
2001 : : void
2002 : 23971 : dsa_detach(dsa_area *area)
2003 : : {
2004 : : int i;
2005 : :
2006 : : /* Detach from all segments. */
2007 [ + + ]: 66039 : for (i = 0; i <= area->high_segment_index; ++i)
2008 [ + + ]: 42068 : if (area->segment_maps[i].segment != NULL)
2009 : 18150 : dsm_detach(area->segment_maps[i].segment);
2010 : :
2011 : : /*
2012 : : * Note that 'detaching' (= detaching from DSM segments) doesn't include
2013 : : * 'releasing' (= adjusting the reference count). It would be nice to
2014 : : * combine these operations, but client code might never get around to
2015 : : * calling dsa_detach because of an error path, and a detach hook on any
2016 : : * particular segment is too late to detach other segments in the area
2017 : : * without risking a 'leak' warning in the non-error path.
2018 : : */
2019 : :
2020 : : /* Free the backend-local area object. */
2021 : 23971 : pfree(area);
2022 : 23971 : }
2023 : :
2024 : : /*
2025 : : * Unlink a segment from the bin that contains it.
2026 : : */
2027 : : static void
2028 : 2288 : unlink_segment(dsa_area *area, dsa_segment_map *segment_map)
2029 : : {
2030 [ + + ]: 2288 : if (segment_map->header->prev != DSA_SEGMENT_INDEX_NONE)
2031 : : {
2032 : : dsa_segment_map *prev;
2033 : :
2034 : 1 : prev = get_segment_by_index(area, segment_map->header->prev);
2035 : 1 : prev->header->next = segment_map->header->next;
2036 : : }
2037 : : else
2038 : : {
2039 [ - + ]: 2287 : Assert(area->control->segment_bins[segment_map->header->bin] ==
2040 : : get_segment_index(area, segment_map));
2041 : 2287 : area->control->segment_bins[segment_map->header->bin] =
2042 : 2287 : segment_map->header->next;
2043 : : }
2044 [ - + ]: 2288 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
2045 : : {
2046 : : dsa_segment_map *next;
2047 : :
3301 rhaas@postgresql.org 2048 :UBC 0 : next = get_segment_by_index(area, segment_map->header->next);
2049 : 0 : next->header->prev = segment_map->header->prev;
2050 : : }
3301 rhaas@postgresql.org 2051 :CBC 2288 : }
2052 : :
2053 : : /*
2054 : : * Find a segment that could satisfy a request for 'npages' of contiguous
2055 : : * memory, or return NULL if none can be found. This may involve attaching to
2056 : : * segments that weren't previously attached so that we can query their free
2057 : : * pages map.
2058 : : */
2059 : : static dsa_segment_map *
2642 tmunro@postgresql.or 2060 : 13055 : get_best_segment(dsa_area *area, size_t npages)
2061 : : {
2062 : : size_t bin;
2063 : :
3301 rhaas@postgresql.org 2064 [ - + ]: 13055 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
2644 tmunro@postgresql.or 2065 : 13055 : check_for_freed_segments_locked(area);
2066 : :
2067 : : /*
2068 : : * Start searching from the first bin that *might* have enough contiguous
2069 : : * pages.
2070 : : */
3301 rhaas@postgresql.org 2071 : 13055 : for (bin = contiguous_pages_to_segment_bin(npages);
2072 [ + + ]: 56694 : bin < DSA_NUM_SEGMENT_BINS;
2073 : 43639 : ++bin)
2074 : : {
2075 : : /*
2076 : : * The minimum contiguous size that any segment in this bin should
2077 : : * have. We'll re-bin if we see segments with fewer.
2078 : : */
2642 tmunro@postgresql.or 2079 : 55671 : size_t threshold = (size_t) 1 << (bin - 1);
2080 : : dsa_segment_index segment_index;
2081 : :
2082 : : /* Search this bin for a segment with enough contiguous space. */
3301 rhaas@postgresql.org 2083 : 55671 : segment_index = area->control->segment_bins[bin];
2084 [ + + ]: 56610 : while (segment_index != DSA_SEGMENT_INDEX_NONE)
2085 : : {
2086 : : dsa_segment_map *segment_map;
2087 : : dsa_segment_index next_segment_index;
2088 : : size_t contiguous_pages;
2089 : :
2090 : 12971 : segment_map = get_segment_by_index(area, segment_index);
2091 : 12971 : next_segment_index = segment_map->header->next;
2092 : 12971 : contiguous_pages = fpm_largest(segment_map->fpm);
2093 : :
2094 : : /* Not enough for the request, still enough for this bin. */
2095 [ + + - + ]: 12971 : if (contiguous_pages >= threshold && contiguous_pages < npages)
2096 : : {
3301 rhaas@postgresql.org 2097 :UBC 0 : segment_index = next_segment_index;
2098 : 0 : continue;
2099 : : }
2100 : :
2101 : : /* Re-bin it if it's no longer in the appropriate bin. */
3301 rhaas@postgresql.org 2102 [ + + ]:CBC 12971 : if (contiguous_pages < threshold)
2103 : : {
896 tmunro@postgresql.or 2104 : 2090 : rebin_segment(area, segment_map);
2105 : :
2106 : : /*
2107 : : * But fall through to see if it's enough to satisfy this
2108 : : * request anyway....
2109 : : */
2110 : : }
2111 : :
2112 : : /* Check if we are done. */
3301 rhaas@postgresql.org 2113 [ + + ]: 12971 : if (contiguous_pages >= npages)
2114 : 12032 : return segment_map;
2115 : :
2116 : : /* Continue searching the same bin. */
2117 : 939 : segment_index = next_segment_index;
2118 : : }
2119 : : }
2120 : :
2121 : : /* Not found. */
2122 : 1023 : return NULL;
2123 : : }
2124 : :
2125 : : /*
2126 : : * Create a new segment that can handle at least requested_pages. Returns
2127 : : * NULL if the requested total size limit or maximum allowed number of
2128 : : * segments would be exceeded.
2129 : : */
2130 : : static dsa_segment_map *
2642 tmunro@postgresql.or 2131 : 1023 : make_new_segment(dsa_area *area, size_t requested_pages)
2132 : : {
2133 : : dsa_segment_index new_index;
2134 : : size_t metadata_bytes;
2135 : : size_t total_size;
2136 : : size_t total_pages;
2137 : : size_t usable_pages;
2138 : : dsa_segment_map *segment_map;
2139 : : dsm_segment *segment;
2140 : : ResourceOwner oldowner;
2141 : :
3301 rhaas@postgresql.org 2142 [ - + ]: 1023 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
2143 : :
2144 : : /* Find a segment slot that is not in use (linearly for now). */
2145 [ + - ]: 1063 : for (new_index = 1; new_index < DSA_MAX_SEGMENTS; ++new_index)
2146 : : {
2147 [ + + ]: 1063 : if (area->control->segment_handles[new_index] == DSM_HANDLE_INVALID)
2148 : 1023 : break;
2149 : : }
2150 [ - + ]: 1023 : if (new_index == DSA_MAX_SEGMENTS)
3301 rhaas@postgresql.org 2151 :UBC 0 : return NULL;
2152 : :
2153 : : /*
2154 : : * If the total size limit is already exceeded, then we exit early and
2155 : : * avoid arithmetic wraparound in the unsigned expressions below.
2156 : : */
3301 rhaas@postgresql.org 2157 :CBC 1023 : if (area->control->total_segment_size >=
2158 [ - + ]: 1023 : area->control->max_total_segment_size)
3301 rhaas@postgresql.org 2159 :UBC 0 : return NULL;
2160 : :
2161 : : /*
2162 : : * The size should be at least as big as requested, and at least big
2163 : : * enough to follow a geometric series that approximately doubles the
2164 : : * total storage each time we create a new segment. We use geometric
2165 : : * growth because the underlying DSM system isn't designed for large
2166 : : * numbers of segments (otherwise we might even consider just using one
2167 : : * DSM segment for each large allocation and for each superblock, and then
2168 : : * we wouldn't need to use FreePageManager).
2169 : : *
2170 : : * We decide on a total segment size first, so that we produce tidy
2171 : : * power-of-two sized segments. This is a good property to have if we
2172 : : * move to huge pages in the future. Then we work back to the number of
2173 : : * pages we can fit.
2174 : : */
629 msawada@postgresql.o 2175 :CBC 1023 : total_size = area->control->init_segment_size *
2642 tmunro@postgresql.or 2176 : 1023 : ((size_t) 1 << (new_index / DSA_NUM_SEGMENTS_AT_EACH_SIZE));
629 msawada@postgresql.o 2177 : 1023 : total_size = Min(total_size, area->control->max_segment_size);
3301 rhaas@postgresql.org 2178 : 1023 : total_size = Min(total_size,
2179 : : area->control->max_total_segment_size -
2180 : : area->control->total_segment_size);
2181 : :
2182 : 1023 : total_pages = total_size / FPM_PAGE_SIZE;
2183 : 1023 : metadata_bytes =
2184 : : MAXALIGN(sizeof(dsa_segment_header)) +
2185 : 1023 : MAXALIGN(sizeof(FreePageManager)) +
2186 : : sizeof(dsa_pointer) * total_pages;
2187 : :
2188 : : /* Add padding up to next page boundary. */
2189 [ + - ]: 1023 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
2190 : 1023 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
2191 [ - + ]: 1023 : if (total_size <= metadata_bytes)
3301 rhaas@postgresql.org 2192 :UBC 0 : return NULL;
3301 rhaas@postgresql.org 2193 :CBC 1023 : usable_pages = (total_size - metadata_bytes) / FPM_PAGE_SIZE;
2194 [ - + ]: 1023 : Assert(metadata_bytes + usable_pages * FPM_PAGE_SIZE <= total_size);
2195 : :
2196 : : /* See if that is enough... */
2197 [ - + ]: 1023 : if (requested_pages > usable_pages)
2198 : : {
2199 : : /*
2200 : : * We'll make an odd-sized segment, working forward from the requested
2201 : : * number of pages.
2202 : : */
3301 rhaas@postgresql.org 2203 :UBC 0 : usable_pages = requested_pages;
2204 : 0 : metadata_bytes =
2205 : : MAXALIGN(sizeof(dsa_segment_header)) +
2206 : 0 : MAXALIGN(sizeof(FreePageManager)) +
2207 : : usable_pages * sizeof(dsa_pointer);
2208 : :
2209 : : /* Add padding up to next page boundary. */
2210 [ # # ]: 0 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
2211 : 0 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
2212 : 0 : total_size = metadata_bytes + usable_pages * FPM_PAGE_SIZE;
2213 : :
2214 : : /* Is that too large for dsa_pointer's addressing scheme? */
2215 [ # # ]: 0 : if (total_size > DSA_MAX_SEGMENT_SIZE)
2216 : 0 : return NULL;
2217 : :
2218 : : /* Would that exceed the limit? */
2219 : 0 : if (total_size > area->control->max_total_segment_size -
2220 [ # # ]: 0 : area->control->total_segment_size)
2221 : 0 : return NULL;
2222 : : }
2223 : :
2224 : : /* Create the segment. */
762 heikki.linnakangas@i 2225 :CBC 1023 : oldowner = CurrentResourceOwner;
2226 : 1023 : CurrentResourceOwner = area->resowner;
3301 rhaas@postgresql.org 2227 : 1023 : segment = dsm_create(total_size, 0);
762 heikki.linnakangas@i 2228 : 1023 : CurrentResourceOwner = oldowner;
3301 rhaas@postgresql.org 2229 [ - + ]: 1023 : if (segment == NULL)
3301 rhaas@postgresql.org 2230 :UBC 0 : return NULL;
3301 rhaas@postgresql.org 2231 :CBC 1023 : dsm_pin_segment(segment);
2232 : :
2233 : : /* Store the handle in shared memory to be found by index. */
2234 : 2046 : area->control->segment_handles[new_index] =
2235 : 1023 : dsm_segment_handle(segment);
2236 : : /* Track the highest segment index in the history of the area. */
2237 [ + - ]: 1023 : if (area->control->high_segment_index < new_index)
2238 : 1023 : area->control->high_segment_index = new_index;
2239 : : /* Track the highest segment index this backend has ever mapped. */
2240 [ + - ]: 1023 : if (area->high_segment_index < new_index)
2241 : 1023 : area->high_segment_index = new_index;
2242 : : /* Track total size of all segments. */
2243 : 1023 : area->control->total_segment_size += total_size;
2244 [ - + ]: 1023 : Assert(area->control->total_segment_size <=
2245 : : area->control->max_total_segment_size);
2246 : :
2247 : : /* Build a segment map for this segment in this backend. */
2248 : 1023 : segment_map = &area->segment_maps[new_index];
2249 : 1023 : segment_map->segment = segment;
2250 : 1023 : segment_map->mapped_address = dsm_segment_address(segment);
2251 : 1023 : segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
2252 : 1023 : segment_map->fpm = (FreePageManager *)
2253 : 1023 : (segment_map->mapped_address +
2254 : : MAXALIGN(sizeof(dsa_segment_header)));
2255 : 1023 : segment_map->pagemap = (dsa_pointer *)
2256 : 1023 : (segment_map->mapped_address +
2257 : 1023 : MAXALIGN(sizeof(dsa_segment_header)) +
2258 : : MAXALIGN(sizeof(FreePageManager)));
2259 : :
2260 : : /* Set up the free page map. */
2261 : 1023 : FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
2262 : 1023 : FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
2263 : : usable_pages);
2264 : :
2265 : : /* Set up the segment header and put it in the appropriate bin. */
2266 : 1023 : segment_map->header->magic =
2267 : 1023 : DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ new_index;
2268 : 1023 : segment_map->header->usable_pages = usable_pages;
2269 : 1023 : segment_map->header->size = total_size;
2270 : 1023 : segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
2271 : 1023 : segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
2272 : 1023 : segment_map->header->next =
2273 : 1023 : area->control->segment_bins[segment_map->header->bin];
2274 : 1023 : segment_map->header->freed = false;
2275 : 1023 : area->control->segment_bins[segment_map->header->bin] = new_index;
2276 [ - + ]: 1023 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
2277 : : {
2278 : : dsa_segment_map *next =
942 tgl@sss.pgh.pa.us 2279 :UBC 0 : get_segment_by_index(area, segment_map->header->next);
2280 : :
3301 rhaas@postgresql.org 2281 [ # # ]: 0 : Assert(next->header->bin == segment_map->header->bin);
2282 : 0 : next->header->prev = new_index;
2283 : : }
2284 : :
3301 rhaas@postgresql.org 2285 :CBC 1023 : return segment_map;
2286 : : }
2287 : :
2288 : : /*
2289 : : * Check if any segments have been freed by destroy_superblock, so we can
2290 : : * detach from them in this backend. This function is called by
2291 : : * dsa_get_address and dsa_free to make sure that a dsa_pointer they have
2292 : : * received can be resolved to the correct segment.
2293 : : *
2294 : : * The danger we want to defend against is that there could be an old segment
2295 : : * mapped into a given slot in this backend, and the dsa_pointer they have
2296 : : * might refer to some new segment in the same slot. So those functions must
2297 : : * be sure to process all instructions to detach from a freed segment that had
2298 : : * been generated by the time this process received the dsa_pointer, before
2299 : : * they call get_segment_by_index.
2300 : : */
2301 : : static void
2302 : 10167802 : check_for_freed_segments(dsa_area *area)
2303 : : {
2304 : : size_t freed_segment_counter;
2305 : :
2306 : : /*
2307 : : * Any other process that has freed a segment has incremented
2308 : : * freed_segment_counter while holding an LWLock, and that must precede
2309 : : * any backend creating a new segment in the same slot while holding an
2310 : : * LWLock, and that must precede the creation of any dsa_pointer pointing
2311 : : * into the new segment which might reach us here, and the caller must
2312 : : * have sent the dsa_pointer to this process using appropriate memory
2313 : : * synchronization (some kind of locking or atomic primitive or system
2314 : : * call). So all we need to do on the reading side is ask for the load of
2315 : : * freed_segment_counter to follow the caller's load of the dsa_pointer it
2316 : : * has, and we can be sure to detect any segments that had been freed as
2317 : : * of the time that the dsa_pointer reached this process.
2318 : : */
2319 : 10167802 : pg_read_barrier();
2320 : 10167802 : freed_segment_counter = area->control->freed_segment_counter;
2321 [ - + ]: 10167802 : if (unlikely(area->freed_segment_counter != freed_segment_counter))
2322 : : {
2323 : : /* Check all currently mapped segments to find what's been freed. */
3301 rhaas@postgresql.org 2324 :UBC 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
2644 tmunro@postgresql.or 2325 : 0 : check_for_freed_segments_locked(area);
2326 : 0 : LWLockRelease(DSA_AREA_LOCK(area));
2327 : : }
2644 tmunro@postgresql.or 2328 :CBC 10167802 : }
2329 : :
2330 : : /*
2331 : : * Workhorse for check_for_freed_segments(), and also used directly in path
2332 : : * where the area lock is already held. This should be called after acquiring
2333 : : * the lock but before looking up any segment by index number, to make sure we
2334 : : * unmap any stale segments that might have previously had the same index as a
2335 : : * current segment.
2336 : : */
2337 : : static void
2338 : 13063 : check_for_freed_segments_locked(dsa_area *area)
2339 : : {
2340 : : size_t freed_segment_counter;
2341 : : int i;
2342 : :
2343 [ - + ]: 13063 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
2344 : 13063 : freed_segment_counter = area->control->freed_segment_counter;
2345 [ - + ]: 13063 : if (unlikely(area->freed_segment_counter != freed_segment_counter))
2346 : : {
3301 rhaas@postgresql.org 2347 [ # # ]:UBC 0 : for (i = 0; i <= area->high_segment_index; ++i)
2348 : : {
2349 [ # # ]: 0 : if (area->segment_maps[i].header != NULL &&
2350 [ # # ]: 0 : area->segment_maps[i].header->freed)
2351 : : {
2352 : 0 : dsm_detach(area->segment_maps[i].segment);
2353 : 0 : area->segment_maps[i].segment = NULL;
2354 : 0 : area->segment_maps[i].header = NULL;
2355 : 0 : area->segment_maps[i].mapped_address = NULL;
2356 : : }
2357 : : }
2358 : 0 : area->freed_segment_counter = freed_segment_counter;
2359 : : }
3301 rhaas@postgresql.org 2360 :CBC 13063 : }
2361 : :
2362 : : /*
2363 : : * Re-bin segment if it's no longer in the appropriate bin.
2364 : : */
2365 : : static void
896 tmunro@postgresql.or 2366 : 4223 : rebin_segment(dsa_area *area, dsa_segment_map *segment_map)
2367 : : {
2368 : : size_t new_bin;
2369 : : dsa_segment_index segment_index;
2370 : :
2371 : 4223 : new_bin = contiguous_pages_to_segment_bin(fpm_largest(segment_map->fpm));
2372 [ + + ]: 4223 : if (segment_map->header->bin == new_bin)
2373 : 1935 : return;
2374 : :
2375 : : /* Remove it from its current bin. */
2376 : 2288 : unlink_segment(area, segment_map);
2377 : :
2378 : : /* Push it onto the front of its new bin. */
2379 : 2288 : segment_index = get_segment_index(area, segment_map);
2380 : 2288 : segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
2381 : 2288 : segment_map->header->next = area->control->segment_bins[new_bin];
2382 : 2288 : segment_map->header->bin = new_bin;
2383 : 2288 : area->control->segment_bins[new_bin] = segment_index;
2384 [ + + ]: 2288 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
2385 : : {
2386 : : dsa_segment_map *next;
2387 : :
2388 : 13 : next = get_segment_by_index(area, segment_map->header->next);
2389 [ - + ]: 13 : Assert(next->header->bin == new_bin);
2390 : 13 : next->header->prev = segment_index;
2391 : : }
2392 : : }
|