4 * mmap/reservation based memory management for Lock-Free RCU Hash Table
6 * Copyright 2011 - Lai Jiangshan <laijs@cn.fujitsu.com>
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rculfhash-internal.h"
28 #define MAP_ANONYMOUS MAP_ANON
32 * The allocation scheme used by the mmap based RCU hash table is to make a
33 * large unaccessible mapping to reserve memory without allocating it.
34 * Then smaller chunks are allocated by overlapping read/write mappings which
35 * do allocate memory. Deallocation is done by an overlapping unaccessible
38 * This scheme was tested on Linux, macOS and Solaris. However, on Cygwin the
39 * mmap wrapper is based on the Windows NtMapViewOfSection API which doesn't
40 * support overlapping mappings.
42 * An alternative to the overlapping mappings is to use mprotect to change the
43 * protection on chunks of the large mapping, read/write to allocate and none
44 * to deallocate. This works perfecty on Cygwin and Solaris but on Linux a
45 * call to madvise is also required to deallocate and it just doesn't work on
48 * For this reason, we keep to original scheme on all platforms except Cygwin.
52 /* Reserve inaccessible memory space without allocating it */
54 void *memory_map(size_t length
)
56 void *ret
= mmap(NULL
, length
, PROT_NONE
,
57 MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
59 assert(ret
!= MAP_FAILED
);
64 void memory_unmap(void *ptr
, size_t length
)
66 int ret
__attribute__((unused
));
68 ret
= munmap(ptr
, length
);
74 /* Set protection to read/write to allocate a memory chunk */
76 void memory_populate(void *ptr
, size_t length
)
78 int ret
__attribute__((unused
));
80 ret
= mprotect(ptr
, length
, PROT_READ
| PROT_WRITE
);
85 /* Set protection to none to deallocate a memory chunk */
87 void memory_discard(void *ptr
, size_t length
)
89 int ret
__attribute__((unused
));
91 ret
= mprotect(ptr
, length
, PROT_NONE
);
96 #else /* __CYGWIN__ */
99 void memory_populate(void *ptr
, size_t length
)
101 void *ret
__attribute__((unused
));
103 ret
= mmap(ptr
, length
, PROT_READ
| PROT_WRITE
,
104 MAP_FIXED
| MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
110 * Discard garbage memory and avoid system save it when try to swap it out.
111 * Make it still reserved, inaccessible.
114 void memory_discard(void *ptr
, size_t length
)
116 void *ret
__attribute__((unused
));
118 ret
= mmap(ptr
, length
, PROT_NONE
,
119 MAP_FIXED
| MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
123 #endif /* __CYGWIN__ */
126 void cds_lfht_alloc_bucket_table(struct cds_lfht
*ht
, unsigned long order
)
129 if (ht
->min_nr_alloc_buckets
== ht
->max_nr_buckets
) {
131 ht
->tbl_mmap
= calloc(ht
->max_nr_buckets
,
132 sizeof(*ht
->tbl_mmap
));
133 assert(ht
->tbl_mmap
);
137 ht
->tbl_mmap
= memory_map(ht
->max_nr_buckets
138 * sizeof(*ht
->tbl_mmap
));
139 memory_populate(ht
->tbl_mmap
,
140 ht
->min_nr_alloc_buckets
* sizeof(*ht
->tbl_mmap
));
141 } else if (order
> ht
->min_alloc_buckets_order
) {
143 unsigned long len
= 1UL << (order
- 1);
145 assert(ht
->min_nr_alloc_buckets
< ht
->max_nr_buckets
);
146 memory_populate(ht
->tbl_mmap
+ len
,
147 len
* sizeof(*ht
->tbl_mmap
));
149 /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */
153 * cds_lfht_free_bucket_table() should be called with decreasing order.
154 * When cds_lfht_free_bucket_table(0) is called, it means the whole
158 void cds_lfht_free_bucket_table(struct cds_lfht
*ht
, unsigned long order
)
161 if (ht
->min_nr_alloc_buckets
== ht
->max_nr_buckets
) {
163 poison_free(ht
->tbl_mmap
);
167 memory_unmap(ht
->tbl_mmap
,
168 ht
->max_nr_buckets
* sizeof(*ht
->tbl_mmap
));
169 } else if (order
> ht
->min_alloc_buckets_order
) {
171 unsigned long len
= 1UL << (order
- 1);
173 assert(ht
->min_nr_alloc_buckets
< ht
->max_nr_buckets
);
174 memory_discard(ht
->tbl_mmap
+ len
, len
* sizeof(*ht
->tbl_mmap
));
176 /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */
180 struct cds_lfht_node
*bucket_at(struct cds_lfht
*ht
, unsigned long index
)
182 return &ht
->tbl_mmap
[index
];
186 struct cds_lfht
*alloc_cds_lfht(unsigned long min_nr_alloc_buckets
,
187 unsigned long max_nr_buckets
)
189 unsigned long page_bucket_size
;
191 page_bucket_size
= getpagesize() / sizeof(struct cds_lfht_node
);
192 if (max_nr_buckets
<= page_bucket_size
) {
194 min_nr_alloc_buckets
= max_nr_buckets
;
197 min_nr_alloc_buckets
= max(min_nr_alloc_buckets
,
201 return __default_alloc_cds_lfht(
202 &cds_lfht_mm_mmap
, sizeof(struct cds_lfht
),
203 min_nr_alloc_buckets
, max_nr_buckets
);
206 const struct cds_lfht_mm_type cds_lfht_mm_mmap
= {
207 .alloc_cds_lfht
= alloc_cds_lfht
,
208 .alloc_bucket_table
= cds_lfht_alloc_bucket_table
,
209 .free_bucket_table
= cds_lfht_free_bucket_table
,
210 .bucket_at
= bucket_at
,