Commit | Line | Data |
---|---|---|
8a953620 MD |
1 | /* |
2 | * rcutorture.h: simple user-level performance/stress test of RCU. | |
3 | * | |
4 | * Usage: | |
5 | * ./rcu <nreaders> rperf [ <cpustride> ] | |
6 | * Run a read-side performance test with the specified | |
7 | * number of readers spaced by <cpustride>. | |
8 | * Thus "./rcu 16 rperf 2" would run 16 readers on even-numbered | |
9 | * CPUs from 0 to 30. | |
10 | * ./rcu <nupdaters> uperf [ <cpustride> ] | |
11 | * Run an update-side performance test with the specified | |
12 | * number of updaters and specified CPU spacing. | |
13 | * ./rcu <nreaders> perf [ <cpustride> ] | |
14 | * Run a combined read/update performance test with the specified | |
15 | * number of readers and one updater and specified CPU spacing. | |
16 | * The readers run on the low-numbered CPUs and the updater | |
17 | * of the highest-numbered CPU. | |
18 | * | |
19 | * The above tests produce output as follows: | |
20 | * | |
21 | * n_reads: 46008000 n_updates: 146026 nreaders: 2 nupdaters: 1 duration: 1 | |
22 | * ns/read: 43.4707 ns/update: 6848.1 | |
23 | * | |
24 | * The first line lists the total number of RCU reads and updates executed | |
25 | * during the test, the number of reader threads, the number of updater | |
26 | * threads, and the duration of the test in seconds. The second line | |
27 | * lists the average duration of each type of operation in nanoseconds, | |
28 | * or "nan" if the corresponding type of operation was not performed. | |
29 | * | |
30 | * ./rcu <nreaders> stress | |
31 | * Run a stress test with the specified number of readers and | |
32 | * one updater. None of the threads are affinitied to any | |
33 | * particular CPU. | |
34 | * | |
35 | * This test produces output as follows: | |
36 | * | |
37 | * n_reads: 114633217 n_updates: 3903415 n_mberror: 0 | |
38 | * rcu_stress_count: 114618391 14826 0 0 0 0 0 0 0 0 0 | |
39 | * | |
40 | * The first line lists the number of RCU read and update operations | |
41 | * executed, followed by the number of memory-ordering violations | |
42 | * (which will be zero in a correct RCU implementation). The second | |
43 | * line lists the number of readers observing progressively more stale | |
44 | * data. A correct RCU implementation will have all but the first two | |
45 | * numbers non-zero. | |
46 | * | |
47 | * This program is free software; you can redistribute it and/or modify | |
48 | * it under the terms of the GNU General Public License as published by | |
49 | * the Free Software Foundation; either version 2 of the License, or | |
50 | * (at your option) any later version. | |
51 | * | |
52 | * This program is distributed in the hope that it will be useful, | |
53 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
54 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
55 | * GNU General Public License for more details. | |
56 | * | |
57 | * You should have received a copy of the GNU General Public License | |
58 | * along with this program; if not, write to the Free Software | |
59 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
60 | * | |
61 | * Copyright (c) 2008 Paul E. McKenney, IBM Corporation. | |
62 | */ | |
63 | ||
64 | /* | |
65 | * Test variables. | |
66 | */ | |
67 | ||
b57aee66 | 68 | #include <stdlib.h> |
b57aee66 | 69 | |
8a953620 MD |
70 | DEFINE_PER_THREAD(long long, n_reads_pt); |
71 | DEFINE_PER_THREAD(long long, n_updates_pt); | |
72 | ||
73 | long long n_reads = 0LL; | |
74 | long n_updates = 0L; | |
6ee91d83 | 75 | int nthreadsrunning; |
8a953620 MD |
76 | char argsbuf[64]; |
77 | ||
78 | #define GOFLAG_INIT 0 | |
79 | #define GOFLAG_RUN 1 | |
80 | #define GOFLAG_STOP 2 | |
81 | ||
06f22bdb | 82 | int goflag __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))) = GOFLAG_INIT; |
8a953620 MD |
83 | |
84 | #define RCU_READ_RUN 1000 | |
85 | ||
86 | //MD | |
87 | #define RCU_READ_NESTABLE | |
88 | ||
89 | #ifdef RCU_READ_NESTABLE | |
90 | #define rcu_read_lock_nest() rcu_read_lock() | |
91 | #define rcu_read_unlock_nest() rcu_read_unlock() | |
92 | #else /* #ifdef RCU_READ_NESTABLE */ | |
93 | #define rcu_read_lock_nest() | |
94 | #define rcu_read_unlock_nest() | |
95 | #endif /* #else #ifdef RCU_READ_NESTABLE */ | |
96 | ||
1a43bbd8 MD |
97 | #ifdef TORTURE_QSBR |
98 | #define mark_rcu_quiescent_state rcu_quiescent_state | |
99 | #define put_thread_offline rcu_thread_offline | |
100 | #define put_thread_online rcu_thread_online | |
101 | #endif | |
102 | ||
8a953620 MD |
103 | #ifndef mark_rcu_quiescent_state |
104 | #define mark_rcu_quiescent_state() do ; while (0) | |
105 | #endif /* #ifdef mark_rcu_quiescent_state */ | |
106 | ||
107 | #ifndef put_thread_offline | |
108 | #define put_thread_offline() do ; while (0) | |
109 | #define put_thread_online() do ; while (0) | |
110 | #define put_thread_online_delay() do ; while (0) | |
111 | #else /* #ifndef put_thread_offline */ | |
112 | #define put_thread_online_delay() synchronize_rcu() | |
113 | #endif /* #else #ifndef put_thread_offline */ | |
114 | ||
115 | /* | |
116 | * Performance test. | |
117 | */ | |
118 | ||
119 | void *rcu_read_perf_test(void *arg) | |
120 | { | |
7106ddf8 | 121 | struct call_rcu_data *crdp; |
8a953620 MD |
122 | int i; |
123 | int me = (long)arg; | |
8a953620 MD |
124 | long long n_reads_local = 0; |
125 | ||
121a5d44 | 126 | rcu_register_thread(); |
8a953620 | 127 | run_on(me); |
ec4e58a3 | 128 | uatomic_inc(&nthreadsrunning); |
8a953620 MD |
129 | while (goflag == GOFLAG_INIT) |
130 | poll(NULL, 0, 1); | |
131 | mark_rcu_quiescent_state(); | |
132 | while (goflag == GOFLAG_RUN) { | |
133 | for (i = 0; i < RCU_READ_RUN; i++) { | |
134 | rcu_read_lock(); | |
135 | /* rcu_read_lock_nest(); */ | |
136 | /* rcu_read_unlock_nest(); */ | |
137 | rcu_read_unlock(); | |
138 | } | |
139 | n_reads_local += RCU_READ_RUN; | |
140 | mark_rcu_quiescent_state(); | |
141 | } | |
142 | __get_thread_var(n_reads_pt) += n_reads_local; | |
143 | put_thread_offline(); | |
7106ddf8 PM |
144 | crdp = get_thread_call_rcu_data(); |
145 | set_thread_call_rcu_data(NULL); | |
146 | call_rcu_data_free(crdp); | |
121a5d44 | 147 | rcu_unregister_thread(); |
8a953620 MD |
148 | |
149 | return (NULL); | |
150 | } | |
151 | ||
152 | void *rcu_update_perf_test(void *arg) | |
153 | { | |
154 | long long n_updates_local = 0; | |
155 | ||
b57aee66 PM |
156 | if ((random() & 0xf00) == 0) { |
157 | struct call_rcu_data *crdp; | |
158 | ||
159 | crdp = create_call_rcu_data(0); | |
160 | if (crdp != NULL) { | |
161 | fprintf(stderr, | |
162 | "Using per-thread call_rcu() worker.\n"); | |
163 | set_thread_call_rcu_data(crdp); | |
164 | } | |
165 | } | |
ec4e58a3 | 166 | uatomic_inc(&nthreadsrunning); |
8a953620 MD |
167 | while (goflag == GOFLAG_INIT) |
168 | poll(NULL, 0, 1); | |
169 | while (goflag == GOFLAG_RUN) { | |
170 | synchronize_rcu(); | |
171 | n_updates_local++; | |
172 | } | |
173 | __get_thread_var(n_updates_pt) += n_updates_local; | |
b0b31506 | 174 | return NULL; |
8a953620 MD |
175 | } |
176 | ||
177 | void perftestinit(void) | |
178 | { | |
179 | init_per_thread(n_reads_pt, 0LL); | |
180 | init_per_thread(n_updates_pt, 0LL); | |
ec4e58a3 | 181 | uatomic_set(&nthreadsrunning, 0); |
8a953620 MD |
182 | } |
183 | ||
184 | void perftestrun(int nthreads, int nreaders, int nupdaters) | |
185 | { | |
186 | int t; | |
187 | int duration = 1; | |
188 | ||
5481ddb3 | 189 | cmm_smp_mb(); |
ec4e58a3 | 190 | while (uatomic_read(&nthreadsrunning) < nthreads) |
8a953620 MD |
191 | poll(NULL, 0, 1); |
192 | goflag = GOFLAG_RUN; | |
5481ddb3 | 193 | cmm_smp_mb(); |
8a953620 | 194 | sleep(duration); |
5481ddb3 | 195 | cmm_smp_mb(); |
8a953620 | 196 | goflag = GOFLAG_STOP; |
5481ddb3 | 197 | cmm_smp_mb(); |
8a953620 MD |
198 | wait_all_threads(); |
199 | for_each_thread(t) { | |
200 | n_reads += per_thread(n_reads_pt, t); | |
201 | n_updates += per_thread(n_updates_pt, t); | |
202 | } | |
203 | printf("n_reads: %lld n_updates: %ld nreaders: %d nupdaters: %d duration: %d\n", | |
204 | n_reads, n_updates, nreaders, nupdaters, duration); | |
205 | printf("ns/read: %g ns/update: %g\n", | |
206 | ((duration * 1000*1000*1000.*(double)nreaders) / | |
207 | (double)n_reads), | |
208 | ((duration * 1000*1000*1000.*(double)nupdaters) / | |
209 | (double)n_updates)); | |
7106ddf8 PM |
210 | if (get_cpu_call_rcu_data(0)) { |
211 | fprintf(stderr, "Deallocating per-CPU call_rcu threads.\n"); | |
212 | free_all_cpu_call_rcu_data(); | |
213 | } | |
8a953620 MD |
214 | exit(0); |
215 | } | |
216 | ||
217 | void perftest(int nreaders, int cpustride) | |
218 | { | |
219 | int i; | |
220 | long arg; | |
221 | ||
222 | perftestinit(); | |
223 | for (i = 0; i < nreaders; i++) { | |
224 | arg = (long)(i * cpustride); | |
225 | create_thread(rcu_read_perf_test, (void *)arg); | |
226 | } | |
227 | arg = (long)(i * cpustride); | |
228 | create_thread(rcu_update_perf_test, (void *)arg); | |
229 | perftestrun(i + 1, nreaders, 1); | |
230 | } | |
231 | ||
232 | void rperftest(int nreaders, int cpustride) | |
233 | { | |
234 | int i; | |
235 | long arg; | |
236 | ||
237 | perftestinit(); | |
238 | init_per_thread(n_reads_pt, 0LL); | |
239 | for (i = 0; i < nreaders; i++) { | |
240 | arg = (long)(i * cpustride); | |
241 | create_thread(rcu_read_perf_test, (void *)arg); | |
242 | } | |
243 | perftestrun(i, nreaders, 0); | |
244 | } | |
245 | ||
246 | void uperftest(int nupdaters, int cpustride) | |
247 | { | |
248 | int i; | |
249 | long arg; | |
250 | ||
251 | perftestinit(); | |
252 | init_per_thread(n_reads_pt, 0LL); | |
253 | for (i = 0; i < nupdaters; i++) { | |
254 | arg = (long)(i * cpustride); | |
255 | create_thread(rcu_update_perf_test, (void *)arg); | |
256 | } | |
257 | perftestrun(i, 0, nupdaters); | |
258 | } | |
259 | ||
260 | /* | |
261 | * Stress test. | |
262 | */ | |
263 | ||
264 | #define RCU_STRESS_PIPE_LEN 10 | |
265 | ||
266 | struct rcu_stress { | |
267 | int pipe_count; | |
268 | int mbtest; | |
269 | }; | |
270 | ||
b0b31506 | 271 | struct rcu_stress rcu_stress_array[RCU_STRESS_PIPE_LEN] = { { 0 } }; |
8a953620 MD |
272 | struct rcu_stress *rcu_stress_current; |
273 | int rcu_stress_idx = 0; | |
274 | ||
275 | int n_mberror = 0; | |
276 | DEFINE_PER_THREAD(long long [RCU_STRESS_PIPE_LEN + 1], rcu_stress_count); | |
277 | ||
278 | int garbage = 0; | |
279 | ||
280 | void *rcu_read_stress_test(void *arg) | |
281 | { | |
282 | int i; | |
283 | int itercnt = 0; | |
284 | struct rcu_stress *p; | |
285 | int pc; | |
286 | ||
121a5d44 | 287 | rcu_register_thread(); |
8a953620 MD |
288 | while (goflag == GOFLAG_INIT) |
289 | poll(NULL, 0, 1); | |
290 | mark_rcu_quiescent_state(); | |
291 | while (goflag == GOFLAG_RUN) { | |
292 | rcu_read_lock(); | |
293 | p = rcu_dereference(rcu_stress_current); | |
294 | if (p->mbtest == 0) | |
295 | n_mberror++; | |
296 | rcu_read_lock_nest(); | |
297 | for (i = 0; i < 100; i++) | |
298 | garbage++; | |
299 | rcu_read_unlock_nest(); | |
300 | pc = p->pipe_count; | |
301 | rcu_read_unlock(); | |
302 | if ((pc > RCU_STRESS_PIPE_LEN) || (pc < 0)) | |
303 | pc = RCU_STRESS_PIPE_LEN; | |
304 | __get_thread_var(rcu_stress_count)[pc]++; | |
305 | __get_thread_var(n_reads_pt)++; | |
306 | mark_rcu_quiescent_state(); | |
307 | if ((++itercnt % 0x1000) == 0) { | |
308 | put_thread_offline(); | |
309 | put_thread_online_delay(); | |
310 | put_thread_online(); | |
311 | } | |
312 | } | |
313 | put_thread_offline(); | |
121a5d44 | 314 | rcu_unregister_thread(); |
8a953620 MD |
315 | |
316 | return (NULL); | |
317 | } | |
318 | ||
b57aee66 PM |
319 | static pthread_mutex_t call_rcu_test_mutex = PTHREAD_MUTEX_INITIALIZER; |
320 | static pthread_cond_t call_rcu_test_cond = PTHREAD_COND_INITIALIZER; | |
321 | ||
322 | void rcu_update_stress_test_rcu(struct rcu_head *head) | |
323 | { | |
324 | if (pthread_mutex_lock(&call_rcu_test_mutex) != 0) { | |
325 | perror("pthread_mutex_lock"); | |
326 | exit(-1); | |
327 | } | |
328 | if (pthread_cond_signal(&call_rcu_test_cond) != 0) { | |
329 | perror("pthread_cond_signal"); | |
330 | exit(-1); | |
331 | } | |
332 | if (pthread_mutex_unlock(&call_rcu_test_mutex) != 0) { | |
333 | perror("pthread_mutex_unlock"); | |
334 | exit(-1); | |
335 | } | |
336 | } | |
337 | ||
8a953620 MD |
338 | void *rcu_update_stress_test(void *arg) |
339 | { | |
340 | int i; | |
341 | struct rcu_stress *p; | |
b57aee66 | 342 | struct rcu_head rh; |
8a953620 MD |
343 | |
344 | while (goflag == GOFLAG_INIT) | |
345 | poll(NULL, 0, 1); | |
346 | while (goflag == GOFLAG_RUN) { | |
347 | i = rcu_stress_idx + 1; | |
348 | if (i >= RCU_STRESS_PIPE_LEN) | |
349 | i = 0; | |
350 | p = &rcu_stress_array[i]; | |
351 | p->mbtest = 0; | |
5481ddb3 | 352 | cmm_smp_mb(); |
8a953620 MD |
353 | p->pipe_count = 0; |
354 | p->mbtest = 1; | |
355 | rcu_assign_pointer(rcu_stress_current, p); | |
356 | rcu_stress_idx = i; | |
357 | for (i = 0; i < RCU_STRESS_PIPE_LEN; i++) | |
358 | if (i != rcu_stress_idx) | |
359 | rcu_stress_array[i].pipe_count++; | |
b57aee66 PM |
360 | if (n_updates & 0x1) |
361 | synchronize_rcu(); | |
362 | else { | |
363 | if (pthread_mutex_lock(&call_rcu_test_mutex) != 0) { | |
364 | perror("pthread_mutex_lock"); | |
365 | exit(-1); | |
366 | } | |
367 | call_rcu(&rh, rcu_update_stress_test_rcu); | |
368 | if (pthread_cond_wait(&call_rcu_test_cond, | |
369 | &call_rcu_test_mutex) != 0) { | |
370 | perror("pthread_cond_wait"); | |
371 | exit(-1); | |
372 | } | |
373 | if (pthread_mutex_unlock(&call_rcu_test_mutex) != 0) { | |
374 | perror("pthread_mutex_unlock"); | |
375 | exit(-1); | |
376 | } | |
377 | } | |
8a953620 MD |
378 | n_updates++; |
379 | } | |
b0b31506 | 380 | return NULL; |
8a953620 MD |
381 | } |
382 | ||
383 | void *rcu_fake_update_stress_test(void *arg) | |
384 | { | |
b57aee66 PM |
385 | if ((random() & 0xf00) == 0) { |
386 | struct call_rcu_data *crdp; | |
387 | ||
388 | crdp = create_call_rcu_data(0); | |
389 | if (crdp != NULL) { | |
390 | fprintf(stderr, | |
391 | "Using per-thread call_rcu() worker.\n"); | |
392 | set_thread_call_rcu_data(crdp); | |
393 | } | |
394 | } | |
8a953620 MD |
395 | while (goflag == GOFLAG_INIT) |
396 | poll(NULL, 0, 1); | |
397 | while (goflag == GOFLAG_RUN) { | |
398 | synchronize_rcu(); | |
399 | poll(NULL, 0, 1); | |
400 | } | |
b0b31506 | 401 | return NULL; |
8a953620 MD |
402 | } |
403 | ||
404 | void stresstest(int nreaders) | |
405 | { | |
406 | int i; | |
407 | int t; | |
408 | long long *p; | |
409 | long long sum; | |
410 | ||
411 | init_per_thread(n_reads_pt, 0LL); | |
412 | for_each_thread(t) { | |
413 | p = &per_thread(rcu_stress_count,t)[0]; | |
414 | for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) | |
415 | p[i] = 0LL; | |
416 | } | |
417 | rcu_stress_current = &rcu_stress_array[0]; | |
418 | rcu_stress_current->pipe_count = 0; | |
419 | rcu_stress_current->mbtest = 1; | |
420 | for (i = 0; i < nreaders; i++) | |
421 | create_thread(rcu_read_stress_test, NULL); | |
422 | create_thread(rcu_update_stress_test, NULL); | |
423 | for (i = 0; i < 5; i++) | |
424 | create_thread(rcu_fake_update_stress_test, NULL); | |
5481ddb3 | 425 | cmm_smp_mb(); |
8a953620 | 426 | goflag = GOFLAG_RUN; |
5481ddb3 | 427 | cmm_smp_mb(); |
8a953620 | 428 | sleep(10); |
5481ddb3 | 429 | cmm_smp_mb(); |
8a953620 | 430 | goflag = GOFLAG_STOP; |
5481ddb3 | 431 | cmm_smp_mb(); |
8a953620 MD |
432 | wait_all_threads(); |
433 | for_each_thread(t) | |
434 | n_reads += per_thread(n_reads_pt, t); | |
b0b31506 | 435 | printf("n_reads: %lld n_updates: %ld n_mberror: %d\n", |
8a953620 MD |
436 | n_reads, n_updates, n_mberror); |
437 | printf("rcu_stress_count:"); | |
438 | for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) { | |
439 | sum = 0LL; | |
440 | for_each_thread(t) { | |
441 | sum += per_thread(rcu_stress_count, t)[i]; | |
442 | } | |
443 | printf(" %lld", sum); | |
444 | } | |
445 | printf("\n"); | |
7106ddf8 PM |
446 | if (get_cpu_call_rcu_data(0)) { |
447 | fprintf(stderr, "Deallocating per-CPU call_rcu threads.\n"); | |
448 | free_all_cpu_call_rcu_data(); | |
449 | } | |
8a953620 MD |
450 | exit(0); |
451 | } | |
452 | ||
453 | /* | |
454 | * Mainprogram. | |
455 | */ | |
456 | ||
457 | void usage(int argc, char *argv[]) | |
458 | { | |
459 | fprintf(stderr, "Usage: %s [nreaders [ perf | stress ] ]\n", argv[0]); | |
460 | exit(-1); | |
461 | } | |
462 | ||
463 | int main(int argc, char *argv[]) | |
464 | { | |
465 | int nreaders = 1; | |
466 | int cpustride = 1; | |
467 | ||
468 | smp_init(); | |
469 | //rcu_init(); | |
b57aee66 PM |
470 | srandom(time(NULL)); |
471 | if (random() & 0x100) { | |
472 | fprintf(stderr, "Allocating per-CPU call_rcu threads.\n"); | |
473 | if (create_all_cpu_call_rcu_data(0)) | |
474 | perror("create_all_cpu_call_rcu_data"); | |
475 | } | |
8a953620 | 476 | |
9b171f46 MD |
477 | #ifdef DEBUG_YIELD |
478 | yield_active |= YIELD_READ; | |
479 | yield_active |= YIELD_WRITE; | |
480 | #endif | |
481 | ||
8a953620 MD |
482 | if (argc > 1) { |
483 | nreaders = strtoul(argv[1], NULL, 0); | |
484 | if (argc == 2) | |
485 | perftest(nreaders, cpustride); | |
486 | if (argc > 3) | |
487 | cpustride = strtoul(argv[3], NULL, 0); | |
488 | if (strcmp(argv[2], "perf") == 0) | |
489 | perftest(nreaders, cpustride); | |
490 | else if (strcmp(argv[2], "rperf") == 0) | |
491 | rperftest(nreaders, cpustride); | |
492 | else if (strcmp(argv[2], "uperf") == 0) | |
493 | uperftest(nreaders, cpustride); | |
494 | else if (strcmp(argv[2], "stress") == 0) | |
495 | stresstest(nreaders); | |
496 | usage(argc, argv); | |
497 | } | |
498 | perftest(nreaders, cpustride); | |
0578089f | 499 | return 0; |
8a953620 | 500 | } |