Commit | Line | Data |
---|---|---|
b0b55251 LJ |
1 | /* |
2 | * rculfhash-mm-mmap.c | |
3 | * | |
4 | * mmap/reservation based memory management for Lock-Free RCU Hash Table | |
5 | * | |
6 | * Copyright 2011 - Lai Jiangshan <laijs@cn.fujitsu.com> | |
7 | * | |
8 | * This library is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * This library is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with this library; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | #include <unistd.h> | |
24 | #include <sys/mman.h> | |
25 | #include "rculfhash-internal.h" | |
26 | ||
0d0cf93f MD |
27 | #ifndef MAP_ANONYMOUS |
28 | #define MAP_ANONYMOUS MAP_ANON | |
29 | #endif | |
30 | ||
387346f0 MJ |
31 | /* |
32 | * The allocation scheme used by the mmap based RCU hash table is to make a | |
33 | * large unaccessible mapping to reserve memory without allocating it. | |
34 | * Then smaller chunks are allocated by overlapping read/write mappings which | |
35 | * do allocate memory. Deallocation is done by an overlapping unaccessible | |
36 | * mapping. | |
37 | * | |
38 | * This scheme was tested on Linux, macOS and Solaris. However, on Cygwin the | |
39 | * mmap wrapper is based on the Windows NtMapViewOfSection API which doesn't | |
40 | * support overlapping mappings. | |
41 | * | |
42 | * An alternative to the overlapping mappings is to use mprotect to change the | |
43 | * protection on chunks of the large mapping, read/write to allocate and none | |
44 | * to deallocate. This works perfecty on Cygwin and Solaris but on Linux a | |
45 | * call to madvise is also required to deallocate and it just doesn't work on | |
46 | * macOS. | |
47 | * | |
48 | * For this reason, we keep to original scheme on all platforms except Cygwin. | |
49 | */ | |
50 | ||
51 | ||
52 | /* Reserve inaccessible memory space without allocating it */ | |
53 | static | |
54 | void *memory_map(size_t length) | |
b0b55251 LJ |
55 | { |
56 | void *ret = mmap(NULL, length, PROT_NONE, | |
57 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | |
58 | ||
59 | assert(ret != MAP_FAILED); | |
60 | return ret; | |
61 | } | |
62 | ||
387346f0 MJ |
63 | static |
64 | void memory_unmap(void *ptr, size_t length) | |
b0b55251 | 65 | { |
ae843a64 LJ |
66 | int ret __attribute__((unused)); |
67 | ||
68 | ret = munmap(ptr, length); | |
b0b55251 LJ |
69 | |
70 | assert(ret == 0); | |
71 | } | |
72 | ||
387346f0 MJ |
73 | #ifdef __CYGWIN__ |
74 | /* Set protection to read/write to allocate a memory chunk */ | |
75 | static | |
76 | void memory_populate(void *ptr, size_t length) | |
77 | { | |
78 | int ret __attribute__((unused)); | |
79 | ||
80 | ret = mprotect(ptr, length, PROT_READ | PROT_WRITE); | |
81 | ||
82 | assert(!ret); | |
83 | } | |
84 | ||
85 | /* Set protection to none to deallocate a memory chunk */ | |
86 | static | |
87 | void memory_discard(void *ptr, size_t length) | |
88 | { | |
89 | int ret __attribute__((unused)); | |
90 | ||
91 | ret = mprotect(ptr, length, PROT_NONE); | |
92 | ||
93 | assert(!ret); | |
94 | } | |
95 | ||
96 | #else /* __CYGWIN__ */ | |
97 | ||
98 | static | |
99 | void memory_populate(void *ptr, size_t length) | |
b0b55251 | 100 | { |
ae843a64 LJ |
101 | void *ret __attribute__((unused)); |
102 | ||
103 | ret = mmap(ptr, length, PROT_READ | PROT_WRITE, | |
b0b55251 LJ |
104 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
105 | ||
106 | assert(ret == ptr); | |
107 | } | |
108 | ||
109 | /* | |
110 | * Discard garbage memory and avoid system save it when try to swap it out. | |
111 | * Make it still reserved, inaccessible. | |
112 | */ | |
387346f0 MJ |
113 | static |
114 | void memory_discard(void *ptr, size_t length) | |
b0b55251 | 115 | { |
ae843a64 LJ |
116 | void *ret __attribute__((unused)); |
117 | ||
118 | ret = mmap(ptr, length, PROT_NONE, | |
b0b55251 LJ |
119 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
120 | ||
121 | assert(ret == ptr); | |
122 | } | |
387346f0 | 123 | #endif /* __CYGWIN__ */ |
b0b55251 LJ |
124 | |
125 | static | |
126 | void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order) | |
127 | { | |
128 | if (order == 0) { | |
129 | if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { | |
130 | /* small table */ | |
131 | ht->tbl_mmap = calloc(ht->max_nr_buckets, | |
132 | sizeof(*ht->tbl_mmap)); | |
133 | assert(ht->tbl_mmap); | |
134 | return; | |
135 | } | |
136 | /* large table */ | |
137 | ht->tbl_mmap = memory_map(ht->max_nr_buckets | |
138 | * sizeof(*ht->tbl_mmap)); | |
139 | memory_populate(ht->tbl_mmap, | |
140 | ht->min_nr_alloc_buckets * sizeof(*ht->tbl_mmap)); | |
141 | } else if (order > ht->min_alloc_buckets_order) { | |
142 | /* large table */ | |
143 | unsigned long len = 1UL << (order - 1); | |
144 | ||
145 | assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); | |
146 | memory_populate(ht->tbl_mmap + len, | |
147 | len * sizeof(*ht->tbl_mmap)); | |
148 | } | |
149 | /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ | |
150 | } | |
151 | ||
152 | /* | |
153 | * cds_lfht_free_bucket_table() should be called with decreasing order. | |
154 | * When cds_lfht_free_bucket_table(0) is called, it means the whole | |
155 | * lfht is destroyed. | |
156 | */ | |
157 | static | |
158 | void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order) | |
159 | { | |
160 | if (order == 0) { | |
161 | if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { | |
162 | /* small table */ | |
163 | poison_free(ht->tbl_mmap); | |
164 | return; | |
165 | } | |
166 | /* large table */ | |
167 | memory_unmap(ht->tbl_mmap, | |
168 | ht->max_nr_buckets * sizeof(*ht->tbl_mmap)); | |
169 | } else if (order > ht->min_alloc_buckets_order) { | |
170 | /* large table */ | |
171 | unsigned long len = 1UL << (order - 1); | |
172 | ||
173 | assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); | |
174 | memory_discard(ht->tbl_mmap + len, len * sizeof(*ht->tbl_mmap)); | |
175 | } | |
176 | /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ | |
177 | } | |
178 | ||
179 | static | |
180 | struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index) | |
181 | { | |
182 | return &ht->tbl_mmap[index]; | |
183 | } | |
184 | ||
185 | static | |
186 | struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets, | |
187 | unsigned long max_nr_buckets) | |
188 | { | |
1228af1c | 189 | unsigned long page_bucket_size; |
b0b55251 | 190 | |
1228af1c | 191 | page_bucket_size = getpagesize() / sizeof(struct cds_lfht_node); |
b0b55251 LJ |
192 | if (max_nr_buckets <= page_bucket_size) { |
193 | /* small table */ | |
194 | min_nr_alloc_buckets = max_nr_buckets; | |
195 | } else { | |
196 | /* large table */ | |
197 | min_nr_alloc_buckets = max(min_nr_alloc_buckets, | |
198 | page_bucket_size); | |
199 | } | |
200 | ||
1228af1c LJ |
201 | return __default_alloc_cds_lfht( |
202 | &cds_lfht_mm_mmap, sizeof(struct cds_lfht), | |
203 | min_nr_alloc_buckets, max_nr_buckets); | |
b0b55251 LJ |
204 | } |
205 | ||
206 | const struct cds_lfht_mm_type cds_lfht_mm_mmap = { | |
207 | .alloc_cds_lfht = alloc_cds_lfht, | |
208 | .alloc_bucket_table = cds_lfht_alloc_bucket_table, | |
209 | .free_bucket_table = cds_lfht_free_bucket_table, | |
210 | .bucket_at = bucket_at, | |
211 | }; |