Commit | Line | Data |
---|---|---|
b0b55251 LJ |
1 | /* |
2 | * rculfhash-mm-mmap.c | |
3 | * | |
4 | * mmap/reservation based memory management for Lock-Free RCU Hash Table | |
5 | * | |
6 | * Copyright 2011 - Lai Jiangshan <laijs@cn.fujitsu.com> | |
7 | * | |
8 | * This library is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * This library is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with this library; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | #include <unistd.h> | |
6ef90903 MD |
24 | #include <stdio.h> |
25 | #include <errno.h> | |
26 | #include <stdlib.h> | |
b0b55251 | 27 | #include <sys/mman.h> |
01477510 | 28 | #include <urcu/assert.h> |
b0b55251 LJ |
29 | #include "rculfhash-internal.h" |
30 | ||
0d0cf93f MD |
31 | #ifndef MAP_ANONYMOUS |
32 | #define MAP_ANONYMOUS MAP_ANON | |
33 | #endif | |
34 | ||
142af0ff MJ |
35 | /* |
36 | * The allocation scheme used by the mmap based RCU hash table is to make a | |
37 | * large unaccessible mapping to reserve memory without allocating it. | |
38 | * Then smaller chunks are allocated by overlapping read/write mappings which | |
39 | * do allocate memory. Deallocation is done by an overlapping unaccessible | |
40 | * mapping. | |
41 | * | |
42 | * This scheme was tested on Linux, macOS and Solaris. However, on Cygwin the | |
43 | * mmap wrapper is based on the Windows NtMapViewOfSection API which doesn't | |
44 | * support overlapping mappings. | |
45 | * | |
46 | * An alternative to the overlapping mappings is to use mprotect to change the | |
47 | * protection on chunks of the large mapping, read/write to allocate and none | |
48 | * to deallocate. This works perfecty on Cygwin and Solaris but on Linux a | |
49 | * call to madvise is also required to deallocate and it just doesn't work on | |
50 | * macOS. | |
51 | * | |
52 | * For this reason, we keep to original scheme on all platforms except Cygwin. | |
53 | */ | |
54 | ||
55 | ||
56 | /* Reserve inaccessible memory space without allocating it */ | |
57 | static | |
58 | void *memory_map(size_t length) | |
b0b55251 | 59 | { |
6ef90903 | 60 | void *ret; |
b0b55251 | 61 | |
6ef90903 MD |
62 | ret = mmap(NULL, length, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
63 | if (ret == MAP_FAILED) { | |
64 | perror("mmap"); | |
65 | abort(); | |
66 | } | |
b0b55251 LJ |
67 | return ret; |
68 | } | |
69 | ||
142af0ff MJ |
70 | static |
71 | void memory_unmap(void *ptr, size_t length) | |
b0b55251 | 72 | { |
6ef90903 MD |
73 | if (munmap(ptr, length)) { |
74 | perror("munmap"); | |
75 | abort(); | |
76 | } | |
b0b55251 LJ |
77 | } |
78 | ||
142af0ff MJ |
79 | #ifdef __CYGWIN__ |
80 | /* Set protection to read/write to allocate a memory chunk */ | |
81 | static | |
82 | void memory_populate(void *ptr, size_t length) | |
83 | { | |
6ef90903 MD |
84 | if (mprotect(ptr, length, PROT_READ | PROT_WRITE)) { |
85 | perror("mprotect"); | |
86 | abort(); | |
87 | } | |
142af0ff MJ |
88 | } |
89 | ||
90 | /* Set protection to none to deallocate a memory chunk */ | |
91 | static | |
92 | void memory_discard(void *ptr, size_t length) | |
93 | { | |
6ef90903 MD |
94 | if (mprotect(ptr, length, PROT_NONE)) { |
95 | perror("mprotect"); | |
96 | abort(); | |
97 | } | |
142af0ff MJ |
98 | } |
99 | ||
100 | #else /* __CYGWIN__ */ | |
101 | ||
102 | static | |
103 | void memory_populate(void *ptr, size_t length) | |
b0b55251 | 104 | { |
6ef90903 MD |
105 | if (mmap(ptr, length, PROT_READ | PROT_WRITE, |
106 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, | |
107 | -1, 0) != ptr) { | |
108 | perror("mmap"); | |
109 | abort(); | |
110 | } | |
b0b55251 LJ |
111 | } |
112 | ||
113 | /* | |
114 | * Discard garbage memory and avoid system save it when try to swap it out. | |
115 | * Make it still reserved, inaccessible. | |
116 | */ | |
142af0ff MJ |
117 | static |
118 | void memory_discard(void *ptr, size_t length) | |
b0b55251 | 119 | { |
6ef90903 MD |
120 | if (mmap(ptr, length, PROT_NONE, |
121 | MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, | |
122 | -1, 0) != ptr) { | |
123 | perror("mmap"); | |
124 | abort(); | |
125 | } | |
b0b55251 | 126 | } |
142af0ff | 127 | #endif /* __CYGWIN__ */ |
b0b55251 LJ |
128 | |
129 | static | |
130 | void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order) | |
131 | { | |
132 | if (order == 0) { | |
133 | if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { | |
134 | /* small table */ | |
135 | ht->tbl_mmap = calloc(ht->max_nr_buckets, | |
136 | sizeof(*ht->tbl_mmap)); | |
01477510 | 137 | urcu_posix_assert(ht->tbl_mmap); |
b0b55251 LJ |
138 | return; |
139 | } | |
140 | /* large table */ | |
141 | ht->tbl_mmap = memory_map(ht->max_nr_buckets | |
142 | * sizeof(*ht->tbl_mmap)); | |
143 | memory_populate(ht->tbl_mmap, | |
144 | ht->min_nr_alloc_buckets * sizeof(*ht->tbl_mmap)); | |
145 | } else if (order > ht->min_alloc_buckets_order) { | |
146 | /* large table */ | |
147 | unsigned long len = 1UL << (order - 1); | |
148 | ||
01477510 | 149 | urcu_posix_assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); |
b0b55251 LJ |
150 | memory_populate(ht->tbl_mmap + len, |
151 | len * sizeof(*ht->tbl_mmap)); | |
152 | } | |
153 | /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ | |
154 | } | |
155 | ||
156 | /* | |
157 | * cds_lfht_free_bucket_table() should be called with decreasing order. | |
158 | * When cds_lfht_free_bucket_table(0) is called, it means the whole | |
159 | * lfht is destroyed. | |
160 | */ | |
161 | static | |
162 | void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order) | |
163 | { | |
164 | if (order == 0) { | |
165 | if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { | |
166 | /* small table */ | |
167 | poison_free(ht->tbl_mmap); | |
168 | return; | |
169 | } | |
170 | /* large table */ | |
171 | memory_unmap(ht->tbl_mmap, | |
172 | ht->max_nr_buckets * sizeof(*ht->tbl_mmap)); | |
173 | } else if (order > ht->min_alloc_buckets_order) { | |
174 | /* large table */ | |
175 | unsigned long len = 1UL << (order - 1); | |
176 | ||
01477510 | 177 | urcu_posix_assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); |
b0b55251 LJ |
178 | memory_discard(ht->tbl_mmap + len, len * sizeof(*ht->tbl_mmap)); |
179 | } | |
180 | /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ | |
181 | } | |
182 | ||
183 | static | |
184 | struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index) | |
185 | { | |
186 | return &ht->tbl_mmap[index]; | |
187 | } | |
188 | ||
189 | static | |
190 | struct cds_lfht *alloc_cds_lfht(unsigned long min_nr_alloc_buckets, | |
191 | unsigned long max_nr_buckets) | |
192 | { | |
1228af1c | 193 | unsigned long page_bucket_size; |
b0b55251 | 194 | |
1228af1c | 195 | page_bucket_size = getpagesize() / sizeof(struct cds_lfht_node); |
b0b55251 LJ |
196 | if (max_nr_buckets <= page_bucket_size) { |
197 | /* small table */ | |
198 | min_nr_alloc_buckets = max_nr_buckets; | |
199 | } else { | |
200 | /* large table */ | |
201 | min_nr_alloc_buckets = max(min_nr_alloc_buckets, | |
202 | page_bucket_size); | |
203 | } | |
204 | ||
1228af1c LJ |
205 | return __default_alloc_cds_lfht( |
206 | &cds_lfht_mm_mmap, sizeof(struct cds_lfht), | |
207 | min_nr_alloc_buckets, max_nr_buckets); | |
b0b55251 LJ |
208 | } |
209 | ||
210 | const struct cds_lfht_mm_type cds_lfht_mm_mmap = { | |
211 | .alloc_cds_lfht = alloc_cds_lfht, | |
212 | .alloc_bucket_table = cds_lfht_alloc_bucket_table, | |
213 | .free_bucket_table = cds_lfht_free_bucket_table, | |
214 | .bucket_at = bucket_at, | |
215 | }; |