Commit | Line | Data |
---|---|---|
10544ee8 | 1 | /* |
c0c0989a | 2 | * SPDX-License-Identifier: LGPL-2.1-or-later |
10544ee8 | 3 | * |
c0c0989a | 4 | * Copyright 2011 Lai Jiangshan <laijs@cn.fujitsu.com> |
10544ee8 | 5 | * |
c0c0989a | 6 | * mmap/reservation based memory management for Lock-Free RCU Hash Table |
10544ee8 MD |
7 | */ |
8 | ||
9 | #include <unistd.h> | |
10 | #include <stdio.h> | |
11 | #include <errno.h> | |
12 | #include <stdlib.h> | |
13 | #include <sys/mman.h> | |
14 | #include "rculfhash-internal.h" | |
15 | ||
16 | #ifndef MAP_ANONYMOUS | |
17 | #define MAP_ANONYMOUS MAP_ANON | |
18 | #endif | |
19 | ||
20 | /* | |
21 | * The allocation scheme used by the mmap based RCU hash table is to make a | |
22 | * large unaccessible mapping to reserve memory without allocating it. | |
23 | * Then smaller chunks are allocated by overlapping read/write mappings which | |
24 | * do allocate memory. Deallocation is done by an overlapping unaccessible | |
25 | * mapping. | |
26 | * | |
27 | * This scheme was tested on Linux, macOS and Solaris. However, on Cygwin the | |
28 | * mmap wrapper is based on the Windows NtMapViewOfSection API which doesn't | |
29 | * support overlapping mappings. | |
30 | * | |
31 | * An alternative to the overlapping mappings is to use mprotect to change the | |
32 | * protection on chunks of the large mapping, read/write to allocate and none | |
33 | * to deallocate. This works perfecty on Cygwin and Solaris but on Linux a | |
34 | * call to madvise is also required to deallocate and it just doesn't work on | |
35 | * macOS. | |
36 | * | |
37 | * For this reason, we keep to original scheme on all platforms except Cygwin. | |
38 | */ | |
39 | ||
40 | ||
41 | /* Reserve inaccessible memory space without allocating it */ | |
42 | static | |
43 | void *memory_map(size_t length) | |
44 | { | |
45 | void *ret; | |
46 | ||
47 | ret = mmap(NULL, length, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | |
48 | if (ret == MAP_FAILED) { | |
49 | perror("mmap"); | |
50 | abort(); | |
51 | } | |
52 | return ret; | |
53 | } | |
54 | ||
55 | static | |
56 | void memory_unmap(void *ptr, size_t length) | |
57 | { | |
58 | if (munmap(ptr, length)) { | |
59 | perror("munmap"); | |
60 | abort(); | |
61 | } | |
62 | } | |
63 | ||
64 | #ifdef __CYGWIN__ | |
65 | /* Set protection to read/write to allocate a memory chunk */ | |
66 | static | |
67 | void memory_populate(void *ptr, size_t length) | |
68 | { | |
69 | if (mprotect(ptr, length, PROT_READ | PROT_WRITE)) { | |
70 | perror("mprotect"); | |
71 | abort(); | |
72 | } | |
73 | } | |
74 | ||
75 | /* Set protection to none to deallocate a memory chunk */ | |
76 | static | |
77 | void memory_discard(void *ptr, size_t length) | |
78 | { | |
79 | if (mprotect(ptr, length, PROT_NONE)) { | |
80 | perror("mprotect"); | |
81 | abort(); | |
82 | } | |
83 | } | |
84 | ||
85 | #else /* __CYGWIN__ */ | |
86 | ||
87 | static | |
88 | void memory_populate(void *ptr, size_t length) | |
89 | { | |
90 | if (mmap(ptr, length, PROT_READ | PROT_WRITE, | |
91 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, | |
92 | -1, 0) != ptr) { | |
93 | perror("mmap"); | |
94 | abort(); | |
95 | } | |
96 | } | |
97 | ||
98 | /* | |
99 | * Discard garbage memory and avoid system save it when try to swap it out. | |
100 | * Make it still reserved, inaccessible. | |
101 | */ | |
102 | static | |
103 | void memory_discard(void *ptr, size_t length) | |
104 | { | |
105 | if (mmap(ptr, length, PROT_NONE, | |
106 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, | |
107 | -1, 0) != ptr) { | |
108 | perror("mmap"); | |
109 | abort(); | |
110 | } | |
111 | } | |
112 | #endif /* __CYGWIN__ */ | |
113 | ||
114 | static | |
115 | void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) | |
116 | { | |
117 | if (order == 0) { | |
118 | if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { | |
119 | /* small table */ | |
120 | ht->tbl_mmap = calloc(ht->max_nr_buckets, | |
121 | sizeof(*ht->tbl_mmap)); | |
122 | assert(ht->tbl_mmap); | |
123 | return; | |
124 | } | |
125 | /* large table */ | |
126 | ht->tbl_mmap = memory_map(ht->max_nr_buckets | |
127 | * sizeof(*ht->tbl_mmap)); | |
128 | memory_populate(ht->tbl_mmap, | |
129 | ht->min_nr_alloc_buckets * sizeof(*ht->tbl_mmap)); | |
130 | } else if (order > ht->min_alloc_buckets_order) { | |
131 | /* large table */ | |
132 | unsigned long len = 1UL << (order - 1); | |
133 | ||
134 | assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); | |
135 | memory_populate(ht->tbl_mmap + len, | |
136 | len * sizeof(*ht->tbl_mmap)); | |
137 | } | |
138 | /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ | |
139 | } | |
140 | ||
141 | /* | |
142 | * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. | |
143 | * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole | |
144 | * lfht is destroyed. | |
145 | */ | |
146 | static | |
147 | void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) | |
148 | { | |
149 | if (order == 0) { | |
150 | if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { | |
151 | /* small table */ | |
152 | poison_free(ht->tbl_mmap); | |
153 | return; | |
154 | } | |
155 | /* large table */ | |
156 | memory_unmap(ht->tbl_mmap, | |
157 | ht->max_nr_buckets * sizeof(*ht->tbl_mmap)); | |
158 | } else if (order > ht->min_alloc_buckets_order) { | |
159 | /* large table */ | |
160 | unsigned long len = 1UL << (order - 1); | |
161 | ||
162 | assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); | |
163 | memory_discard(ht->tbl_mmap + len, len * sizeof(*ht->tbl_mmap)); | |
164 | } | |
165 | /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ | |
166 | } | |
167 | ||
168 | static | |
169 | struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) | |
170 | { | |
171 | return &ht->tbl_mmap[index]; | |
172 | } | |
173 | ||
174 | static | |
175 | struct lttng_ust_lfht *alloc_lttng_ust_lfht(unsigned long min_nr_alloc_buckets, | |
176 | unsigned long max_nr_buckets) | |
177 | { | |
178 | unsigned long page_bucket_size; | |
179 | ||
180 | page_bucket_size = getpagesize() / sizeof(struct lttng_ust_lfht_node); | |
181 | if (max_nr_buckets <= page_bucket_size) { | |
182 | /* small table */ | |
183 | min_nr_alloc_buckets = max_nr_buckets; | |
184 | } else { | |
185 | /* large table */ | |
186 | min_nr_alloc_buckets = max(min_nr_alloc_buckets, | |
187 | page_bucket_size); | |
188 | } | |
189 | ||
190 | return __default_alloc_lttng_ust_lfht( | |
191 | <tng_ust_lfht_mm_mmap, sizeof(struct lttng_ust_lfht), | |
192 | min_nr_alloc_buckets, max_nr_buckets); | |
193 | } | |
194 | ||
195 | const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_mmap = { | |
196 | .alloc_lttng_ust_lfht = alloc_lttng_ust_lfht, | |
197 | .alloc_bucket_table = lttng_ust_lfht_alloc_bucket_table, | |
198 | .free_bucket_table = lttng_ust_lfht_free_bucket_table, | |
199 | .bucket_at = bucket_at, | |
200 | }; |