2 * mem.spin: Promela code to validate memory barriers with OOO memory
3 * and out-of-order instruction scheduling.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (c) 2009 Mathieu Desnoyers
22 /* Promela validation variables. */
24 /* specific defines "included" here */
25 /* DEFINES file "included" here */
32 #define get_pid() (_pid)
34 #define get_readerid() (get_pid())
37 * Produced process control and data flow. Updated after each instruction to
38 * show which variables are ready. Using one-hot bit encoding per variable to
39 * save state space. Used as triggers to execute the instructions having those
40 * variables as input. Leaving bits active to inhibit instruction execution.
41 * Scheme used to make instruction disabling and automatic dependency fall-back
45 #define CONSUME_TOKENS(state, bits, notbits) \
46 ((!(state & (notbits))) && (state & (bits)) == (bits))
48 #define PRODUCE_TOKENS(state, bits) \
49 state = state | (bits);
51 #define CLEAR_TOKENS(state, bits) \
52 state = state & ~(bits)
55 * Types of dependency :
59 * - True dependency, Read-after-Write (RAW)
61 * This type of dependency happens when a statement depends on the result of a
62 * previous statement. This applies to any statement which needs to read a
63 * variable written by a preceding statement.
65 * - False dependency, Write-after-Read (WAR)
67 * Typically, variable renaming can ensure that this dependency goes away.
68 * However, if the statements must read and then write from/to the same variable
69 * in the OOO memory model, renaming may be impossible, and therefore this
70 * causes a WAR dependency.
72 * - Output dependency, Write-after-Write (WAW)
74 * Two writes to the same variable in subsequent statements. Variable renaming
75 * can ensure this is not needed, but can be required when writing multiple
76 * times to the same OOO mem model variable.
80 * Execution of a given instruction depends on a previous instruction evaluating
81 * in a way that allows its execution. E.g. : branches.
83 * Useful considerations for joining dependencies after branch
87 * "We say box i dominates box j if every path (leading from input to output
88 * through the diagram) which passes through box j must also pass through box
89 * i. Thus box i dominates box j if box j is subordinate to box i in the
92 * http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
93 * Other classic algorithm to calculate dominance : Lengauer-Tarjan (in gcc)
97 * Just as pre-dominance, but with arcs of the data flow inverted, and input vs
98 * output exchanged. Therefore, i post-dominating j ensures that every path
99 * passing by j will pass by i before reaching the output.
101 * Prefetch and speculative execution
103 * If an instruction depends on the result of a previous branch, but it does not
104 * have side-effects, it can be executed before the branch result is known.
105 * however, it must be restarted if a core-synchronizing instruction is issued.
106 * Note that instructions which depend on the speculative instruction result
107 * but that have side-effects must depend on the branch completion in addition
108 * to the speculatively executed instruction.
110 * Other considerations
112 * Note about "volatile" keyword dependency : The compiler will order volatile
113 * accesses so they appear in the right order on a given CPU. They can be
114 * reordered by the CPU instruction scheduling. This therefore cannot be
115 * considered as a depencency.
119 * Cooper, Keith D.; & Torczon, Linda. (2005). Engineering a Compiler. Morgan
120 * Kaufmann. ISBN 1-55860-698-X.
121 * Kennedy, Ken; & Allen, Randy. (2001). Optimizing Compilers for Modern
122 * Architectures: A Dependence-based Approach. Morgan Kaufmann. ISBN
124 * Muchnick, Steven S. (1997). Advanced Compiler Design and Implementation.
125 * Morgan Kaufmann. ISBN 1-55860-320-4.
129 * Note about loops and nested calls
131 * To keep this model simple, loops expressed in the framework will behave as if
132 * there was a core synchronizing instruction between loops. To see the effect
133 * of loop unrolling, manually unrolling loops is required. Note that if loops
134 * end or start with a core synchronizing instruction, the model is appropriate.
135 * Nested calls are not supported.
139 * Only Alpha has out-of-order cache bank loads. Other architectures (intel,
140 * powerpc, arm) ensure that dependent reads won't be reordered. c.f.
141 * http://www.linuxjournal.com/article/8212)
144 #define HAVE_OOO_CACHE_READ
148 * Each process have its own data in cache. Caches are randomly updated.
149 * smp_wmb and smp_rmb forces cache updates (write and read), smp_mb forces
153 typedef per_proc_byte {
157 typedef per_proc_bit {
161 /* Bitfield has a maximum of 8 procs */
162 typedef per_proc_bitfield {
166 #define DECLARE_CACHED_VAR(type, x) \
169 #define DECLARE_PROC_CACHED_VAR(type, x)\
173 #define INIT_CACHED_VAR(x, v) \
176 #define INIT_PROC_CACHED_VAR(x, v) \
177 cache_dirty_##x = 0; \
180 #define IS_CACHE_DIRTY(x, id) (cache_dirty_##x)
182 #define READ_CACHED_VAR(x) (cached_##x)
184 #define WRITE_CACHED_VAR(x, v) \
187 cache_dirty_##x = 1; \
190 #define CACHE_WRITE_TO_MEM(x, id) \
192 :: IS_CACHE_DIRTY(x, id) -> \
193 mem_##x = cached_##x; \
194 cache_dirty_##x = 0; \
199 #define CACHE_READ_FROM_MEM(x, id) \
201 :: !IS_CACHE_DIRTY(x, id) -> \
202 cached_##x = mem_##x; \
208 * May update other caches if cache is dirty, or not.
210 #define RANDOM_CACHE_WRITE_TO_MEM(x, id)\
212 :: 1 -> CACHE_WRITE_TO_MEM(x, id); \
216 #define RANDOM_CACHE_READ_FROM_MEM(x, id)\
218 :: 1 -> CACHE_READ_FROM_MEM(x, id); \
222 /* Must consume all prior read tokens. All subsequent reads depend on it. */
226 CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
230 CACHE_READ_FROM_MEM(urcu_active_readers[i], get_pid());
232 :: i >= NR_READERS -> break
234 CACHE_READ_FROM_MEM(rcu_ptr, get_pid());
238 CACHE_READ_FROM_MEM(rcu_data[i], get_pid());
240 :: i >= SLAB_SIZE -> break
245 /* Must consume all prior write tokens. All subsequent writes depend on it. */
249 CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
253 CACHE_WRITE_TO_MEM(urcu_active_readers[i], get_pid());
255 :: i >= NR_READERS -> break
257 CACHE_WRITE_TO_MEM(rcu_ptr, get_pid());
261 CACHE_WRITE_TO_MEM(rcu_data[i], get_pid());
263 :: i >= SLAB_SIZE -> break
268 /* Synchronization point. Must consume all prior read and write tokens. All
269 * subsequent reads and writes depend on it. */
278 #ifdef REMOTE_BARRIERS
280 bit reader_barrier[NR_READERS];
283 * We cannot leave the barriers dependencies in place in REMOTE_BARRIERS mode
284 * because they would add unexisting core synchronization and would therefore
285 * create an incomplete model.
286 * Therefore, we model the read-side memory barriers by completely disabling the
287 * memory barriers and their dependencies from the read-side. One at a time
288 * (different verification runs), we make a different instruction listen for
292 #define smp_mb_reader(i, j)
295 * Service 0, 1 or many barrier requests.
297 inline smp_mb_recv(i, j)
300 :: (reader_barrier[get_readerid()] == 1) ->
302 * We choose to ignore cycles caused by writer busy-looping,
303 * waiting for the reader, sending barrier requests, and the
304 * reader always services them without continuing execution.
306 progress_ignoring_mb1:
308 reader_barrier[get_readerid()] = 0;
311 * We choose to ignore writer's non-progress caused by the
312 * reader ignoring the writer's mb() requests.
314 progress_ignoring_mb2:
319 #define PROGRESS_LABEL(progressid) progress_writer_progid_##progressid:
321 #define smp_mb_send(i, j, progressid) \
326 :: i < NR_READERS -> \
327 reader_barrier[i] = 1; \
329 * Busy-looping waiting for reader barrier handling is of little\
330 * interest, given the reader has the ability to totally ignore \
331 * barrier requests. \
334 :: (reader_barrier[i] == 1) -> \
335 PROGRESS_LABEL(progressid) \
337 :: (reader_barrier[i] == 0) -> break; \
340 :: i >= NR_READERS -> \
348 #define smp_mb_send(i, j, progressid) smp_mb(i)
349 #define smp_mb_reader(i, j) smp_mb(i)
350 #define smp_mb_recv(i, j)
354 /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
355 DECLARE_CACHED_VAR(byte, urcu_gp_ctr);
356 /* Note ! currently only one reader */
357 DECLARE_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
359 DECLARE_CACHED_VAR(bit, rcu_data[SLAB_SIZE]);
363 DECLARE_CACHED_VAR(bit, rcu_ptr);
364 bit ptr_read_first[NR_READERS];
365 bit ptr_read_second[NR_READERS];
367 DECLARE_CACHED_VAR(byte, rcu_ptr);
368 byte ptr_read_first[NR_READERS];
369 byte ptr_read_second[NR_READERS];
372 bit data_read_first[NR_READERS];
373 bit data_read_second[NR_READERS];
377 inline wait_init_done()
380 :: init_done == 0 -> skip;
388 RANDOM_CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
392 RANDOM_CACHE_WRITE_TO_MEM(urcu_active_readers[i],
395 :: i >= NR_READERS -> break
397 RANDOM_CACHE_WRITE_TO_MEM(rcu_ptr, get_pid());
401 RANDOM_CACHE_WRITE_TO_MEM(rcu_data[i], get_pid());
403 :: i >= SLAB_SIZE -> break
405 #ifdef HAVE_OOO_CACHE_READ
406 RANDOM_CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
410 RANDOM_CACHE_READ_FROM_MEM(urcu_active_readers[i],
413 :: i >= NR_READERS -> break
415 RANDOM_CACHE_READ_FROM_MEM(rcu_ptr, get_pid());
419 RANDOM_CACHE_READ_FROM_MEM(rcu_data[i], get_pid());
421 :: i >= SLAB_SIZE -> break
425 #endif /* HAVE_OOO_CACHE_READ */
430 * Bit encoding, urcu_reader :
433 int _proc_urcu_reader;
434 #define proc_urcu_reader _proc_urcu_reader
436 /* Body of PROCEDURE_READ_LOCK */
437 #define READ_PROD_A_READ (1 << 0)
438 #define READ_PROD_B_IF_TRUE (1 << 1)
439 #define READ_PROD_B_IF_FALSE (1 << 2)
440 #define READ_PROD_C_IF_TRUE_READ (1 << 3)
442 #define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken) \
443 :: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) -> \
445 tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
446 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base); \
447 :: CONSUME_TOKENS(proc_urcu_reader, \
448 READ_PROD_A_READ << base, /* RAW, pre-dominant */ \
449 (READ_PROD_B_IF_TRUE | READ_PROD_B_IF_FALSE) << base) -> \
451 :: (!(tmp & RCU_GP_CTR_NEST_MASK)) -> \
452 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base); \
454 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \
457 :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */ \
458 READ_PROD_C_IF_TRUE_READ << base) -> \
460 tmp2 = READ_CACHED_VAR(urcu_gp_ctr); \
461 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base); \
462 :: CONSUME_TOKENS(proc_urcu_reader, \
463 (READ_PROD_B_IF_TRUE \
464 | READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \
465 | READ_PROD_A_READ) << base, /* WAR */ \
468 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2); \
469 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
470 /* IF_MERGE implies \
471 * post-dominance */ \
473 :: CONSUME_TOKENS(proc_urcu_reader, \
474 (READ_PROD_B_IF_FALSE /* pre-dominant */ \
475 | READ_PROD_A_READ) << base, /* WAR */ \
478 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], \
480 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
481 /* IF_MERGE implies \
482 * post-dominance */ \
486 /* Body of PROCEDURE_READ_LOCK */
487 #define READ_PROC_READ_UNLOCK (1 << 0)
489 #define PROCEDURE_READ_UNLOCK(base, consumetoken, producetoken) \
490 :: CONSUME_TOKENS(proc_urcu_reader, \
492 READ_PROC_READ_UNLOCK << base) -> \
494 tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
495 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base); \
496 :: CONSUME_TOKENS(proc_urcu_reader, \
498 | (READ_PROC_READ_UNLOCK << base), /* WAR */ \
501 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1); \
502 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
506 #define READ_PROD_NONE (1 << 0)
508 /* PROCEDURE_READ_LOCK base = << 1 : 1 to 5 */
509 #define READ_LOCK_BASE 1
510 #define READ_LOCK_OUT (1 << 5)
512 #define READ_PROC_FIRST_MB (1 << 6)
514 /* PROCEDURE_READ_LOCK (NESTED) base : << 7 : 7 to 11 */
515 #define READ_LOCK_NESTED_BASE 7
516 #define READ_LOCK_NESTED_OUT (1 << 11)
518 #define READ_PROC_READ_GEN (1 << 12)
519 #define READ_PROC_ACCESS_GEN (1 << 13)
521 /* PROCEDURE_READ_UNLOCK (NESTED) base = << 14 : 14 to 15 */
522 #define READ_UNLOCK_NESTED_BASE 14
523 #define READ_UNLOCK_NESTED_OUT (1 << 15)
525 #define READ_PROC_SECOND_MB (1 << 16)
527 /* PROCEDURE_READ_UNLOCK base = << 17 : 17 to 18 */
528 #define READ_UNLOCK_BASE 17
529 #define READ_UNLOCK_OUT (1 << 18)
531 /* PROCEDURE_READ_LOCK_UNROLL base = << 19 : 19 to 23 */
532 #define READ_LOCK_UNROLL_BASE 19
533 #define READ_LOCK_OUT_UNROLL (1 << 23)
535 #define READ_PROC_THIRD_MB (1 << 24)
537 #define READ_PROC_READ_GEN_UNROLL (1 << 25)
538 #define READ_PROC_ACCESS_GEN_UNROLL (1 << 26)
540 #define READ_PROC_FOURTH_MB (1 << 27)
542 /* PROCEDURE_READ_UNLOCK_UNROLL base = << 28 : 28 to 29 */
543 #define READ_UNLOCK_UNROLL_BASE 28
544 #define READ_UNLOCK_OUT_UNROLL (1 << 29)
547 /* Should not include branches */
548 #define READ_PROC_ALL_TOKENS (READ_PROD_NONE \
550 | READ_PROC_FIRST_MB \
551 | READ_LOCK_NESTED_OUT \
552 | READ_PROC_READ_GEN \
553 | READ_PROC_ACCESS_GEN \
554 | READ_UNLOCK_NESTED_OUT \
555 | READ_PROC_SECOND_MB \
557 | READ_LOCK_OUT_UNROLL \
558 | READ_PROC_THIRD_MB \
559 | READ_PROC_READ_GEN_UNROLL \
560 | READ_PROC_ACCESS_GEN_UNROLL \
561 | READ_PROC_FOURTH_MB \
562 | READ_UNLOCK_OUT_UNROLL)
564 /* Must clear all tokens, including branches */
565 #define READ_PROC_ALL_TOKENS_CLEAR ((1 << 30) - 1)
567 inline urcu_one_read(i, j, nest_i, tmp, tmp2)
569 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_NONE);
572 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
573 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
574 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_THIRD_MB);
575 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FOURTH_MB);
578 #ifdef REMOTE_BARRIERS
579 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
580 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
581 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_THIRD_MB);
582 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FOURTH_MB);
588 #ifdef REMOTE_BARRIERS
590 * Signal-based memory barrier will only execute when the
591 * execution order appears in program order.
597 :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE,
598 READ_LOCK_OUT | READ_LOCK_NESTED_OUT
599 | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
601 | READ_LOCK_OUT_UNROLL
602 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
603 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT,
605 | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
607 | READ_LOCK_OUT_UNROLL
608 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
609 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT | READ_LOCK_NESTED_OUT,
610 READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
612 | READ_LOCK_OUT_UNROLL
613 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
614 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
615 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN,
616 READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
618 | READ_LOCK_OUT_UNROLL
619 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
620 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
621 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN,
622 READ_UNLOCK_NESTED_OUT
624 | READ_LOCK_OUT_UNROLL
625 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
626 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
627 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
628 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT,
630 | READ_LOCK_OUT_UNROLL
631 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
632 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
633 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
634 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
637 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
638 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
639 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
640 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
641 | READ_UNLOCK_OUT | READ_LOCK_OUT_UNROLL,
642 READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
643 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
644 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
645 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
646 | READ_UNLOCK_OUT | READ_LOCK_OUT_UNROLL
647 | READ_PROC_READ_GEN_UNROLL,
648 READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
649 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
650 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
651 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
652 | READ_UNLOCK_OUT | READ_LOCK_OUT_UNROLL
653 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL,
654 READ_UNLOCK_OUT_UNROLL)
655 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
656 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
657 | READ_UNLOCK_OUT | READ_LOCK_OUT_UNROLL
658 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL,
667 goto non_atomic3_skip;
670 goto non_atomic3_end;
673 #endif /* REMOTE_BARRIERS */
677 PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT);
679 :: CONSUME_TOKENS(proc_urcu_reader,
680 READ_LOCK_OUT, /* post-dominant */
681 READ_PROC_FIRST_MB) ->
683 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
685 PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB, READ_LOCK_OUT,
686 READ_LOCK_NESTED_OUT);
688 :: CONSUME_TOKENS(proc_urcu_reader,
689 READ_PROC_FIRST_MB, /* mb() orders reads */
690 READ_PROC_READ_GEN) ->
692 ptr_read_first[get_readerid()] = READ_CACHED_VAR(rcu_ptr);
693 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_GEN);
695 :: CONSUME_TOKENS(proc_urcu_reader,
696 READ_PROC_FIRST_MB /* mb() orders reads */
697 | READ_PROC_READ_GEN,
698 READ_PROC_ACCESS_GEN) ->
699 /* smp_read_barrier_depends */
702 data_read_first[get_readerid()] =
703 READ_CACHED_VAR(rcu_data[ptr_read_first[get_readerid()]]);
704 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_ACCESS_GEN);
707 /* Note : we remove the nested memory barrier from the read unlock
708 * model, given it is not usually needed. The implementation has the barrier
709 * because the performance impact added by a branch in the common case does not
713 PROCEDURE_READ_UNLOCK(READ_UNLOCK_NESTED_BASE,
716 | READ_LOCK_NESTED_OUT,
717 READ_UNLOCK_NESTED_OUT);
720 :: CONSUME_TOKENS(proc_urcu_reader,
721 READ_PROC_ACCESS_GEN /* mb() orders reads */
722 | READ_PROC_READ_GEN /* mb() orders reads */
723 | READ_PROC_FIRST_MB /* mb() ordered */
724 | READ_LOCK_OUT /* post-dominant */
725 | READ_LOCK_NESTED_OUT /* post-dominant */
726 | READ_UNLOCK_NESTED_OUT,
727 READ_PROC_SECOND_MB) ->
729 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
731 PROCEDURE_READ_UNLOCK(READ_UNLOCK_BASE,
732 READ_PROC_SECOND_MB /* mb() orders reads */
733 | READ_PROC_FIRST_MB /* mb() orders reads */
734 | READ_LOCK_NESTED_OUT /* RAW */
735 | READ_LOCK_OUT /* RAW */
736 | READ_UNLOCK_NESTED_OUT, /* RAW */
739 /* Unrolling loop : second consecutive lock */
740 /* reading urcu_active_readers, which have been written by
741 * READ_UNLOCK_OUT : RAW */
742 PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE,
743 READ_PROC_SECOND_MB /* mb() orders reads */
744 | READ_PROC_FIRST_MB, /* mb() orders reads */
745 READ_LOCK_NESTED_OUT /* RAW */
746 | READ_LOCK_OUT /* RAW */
747 | READ_UNLOCK_NESTED_OUT /* RAW */
748 | READ_UNLOCK_OUT, /* RAW */
749 READ_LOCK_OUT_UNROLL);
752 :: CONSUME_TOKENS(proc_urcu_reader,
753 READ_PROC_FIRST_MB /* mb() ordered */
754 | READ_PROC_SECOND_MB /* mb() ordered */
755 | READ_LOCK_OUT_UNROLL /* post-dominant */
756 | READ_LOCK_NESTED_OUT
758 | READ_UNLOCK_NESTED_OUT
760 READ_PROC_THIRD_MB) ->
762 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_THIRD_MB);
764 :: CONSUME_TOKENS(proc_urcu_reader,
765 READ_PROC_FIRST_MB /* mb() orders reads */
766 | READ_PROC_SECOND_MB /* mb() orders reads */
767 | READ_PROC_THIRD_MB, /* mb() orders reads */
768 READ_PROC_READ_GEN_UNROLL) ->
770 ptr_read_second[get_readerid()] = READ_CACHED_VAR(rcu_ptr);
771 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_GEN_UNROLL);
773 :: CONSUME_TOKENS(proc_urcu_reader,
774 READ_PROC_READ_GEN_UNROLL
775 | READ_PROC_FIRST_MB /* mb() orders reads */
776 | READ_PROC_SECOND_MB /* mb() orders reads */
777 | READ_PROC_THIRD_MB, /* mb() orders reads */
778 READ_PROC_ACCESS_GEN_UNROLL) ->
779 /* smp_read_barrier_depends */
782 data_read_second[get_readerid()] =
783 READ_CACHED_VAR(rcu_data[ptr_read_second[get_readerid()]]);
784 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_ACCESS_GEN_UNROLL);
786 :: CONSUME_TOKENS(proc_urcu_reader,
787 READ_PROC_READ_GEN_UNROLL /* mb() orders reads */
788 | READ_PROC_ACCESS_GEN_UNROLL /* mb() orders reads */
789 | READ_PROC_FIRST_MB /* mb() ordered */
790 | READ_PROC_SECOND_MB /* mb() ordered */
791 | READ_PROC_THIRD_MB /* mb() ordered */
792 | READ_LOCK_OUT_UNROLL /* post-dominant */
793 | READ_LOCK_NESTED_OUT
795 | READ_UNLOCK_NESTED_OUT
797 READ_PROC_FOURTH_MB) ->
799 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FOURTH_MB);
801 PROCEDURE_READ_UNLOCK(READ_UNLOCK_UNROLL_BASE,
802 READ_PROC_FOURTH_MB /* mb() orders reads */
803 | READ_PROC_THIRD_MB /* mb() orders reads */
804 | READ_LOCK_OUT_UNROLL /* RAW */
805 | READ_PROC_SECOND_MB /* mb() orders reads */
806 | READ_PROC_FIRST_MB /* mb() orders reads */
807 | READ_LOCK_NESTED_OUT /* RAW */
808 | READ_LOCK_OUT /* RAW */
809 | READ_UNLOCK_NESTED_OUT, /* RAW */
810 READ_UNLOCK_OUT_UNROLL);
811 :: CONSUME_TOKENS(proc_urcu_reader, READ_PROC_ALL_TOKENS, 0) ->
812 CLEAR_TOKENS(proc_urcu_reader, READ_PROC_ALL_TOKENS_CLEAR);
818 * Dependency between consecutive loops :
820 * WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1)
821 * tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);
823 * _WHEN THE MB()s are in place_, they add full ordering of the
824 * generation pointer read wrt active reader count read, which ensures
825 * execution will not spill across loop execution.
826 * However, in the event mb()s are removed (execution using signal
827 * handler to promote barrier()() -> smp_mb()), nothing prevents one loop
828 * to spill its execution on other loop's execution.
851 active proctype urcu_reader()
856 /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
857 DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr);
858 /* Note ! currently only one reader */
859 DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
861 DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]);
865 DECLARE_PROC_CACHED_VAR(bit, rcu_ptr);
867 DECLARE_PROC_CACHED_VAR(byte, rcu_ptr);
871 INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1);
872 INIT_PROC_CACHED_VAR(rcu_ptr, 0);
877 INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0);
879 :: i >= NR_READERS -> break
881 INIT_PROC_CACHED_VAR(rcu_data[0], WINE);
885 INIT_PROC_CACHED_VAR(rcu_data[i], POISON);
887 :: i >= SLAB_SIZE -> break
893 assert(get_pid() < NR_PROCS);
899 * We do not test reader's progress here, because we are mainly
900 * interested in writer's progress. The reader never blocks
901 * anyway. We have to test for reader/writer's progress
902 * separately, otherwise we could think the writer is doing
903 * progress when it's blocked by an always progressing reader.
905 #ifdef READER_PROGRESS
908 urcu_one_read(i, j, nest_i, tmp, tmp2);
912 /* no name clash please */
913 #undef proc_urcu_reader
916 /* Model the RCU update process. */
919 * Bit encoding, urcu_writer :
920 * Currently only supports one reader.
923 int _proc_urcu_writer;
924 #define proc_urcu_writer _proc_urcu_writer
926 #define WRITE_PROD_NONE (1 << 0)
928 #define WRITE_DATA (1 << 1)
929 #define WRITE_PROC_WMB (1 << 2)
930 #define WRITE_XCHG_PTR (1 << 3)
932 #define WRITE_PROC_FIRST_MB (1 << 4)
935 #define WRITE_PROC_FIRST_READ_GP (1 << 5)
936 #define WRITE_PROC_FIRST_WRITE_GP (1 << 6)
937 #define WRITE_PROC_FIRST_WAIT (1 << 7)
938 #define WRITE_PROC_FIRST_WAIT_LOOP (1 << 8)
941 #define WRITE_PROC_SECOND_READ_GP (1 << 9)
942 #define WRITE_PROC_SECOND_WRITE_GP (1 << 10)
943 #define WRITE_PROC_SECOND_WAIT (1 << 11)
944 #define WRITE_PROC_SECOND_WAIT_LOOP (1 << 12)
946 #define WRITE_PROC_SECOND_MB (1 << 13)
948 #define WRITE_FREE (1 << 14)
950 #define WRITE_PROC_ALL_TOKENS (WRITE_PROD_NONE \
954 | WRITE_PROC_FIRST_MB \
955 | WRITE_PROC_FIRST_READ_GP \
956 | WRITE_PROC_FIRST_WRITE_GP \
957 | WRITE_PROC_FIRST_WAIT \
958 | WRITE_PROC_SECOND_READ_GP \
959 | WRITE_PROC_SECOND_WRITE_GP \
960 | WRITE_PROC_SECOND_WAIT \
961 | WRITE_PROC_SECOND_MB \
964 #define WRITE_PROC_ALL_TOKENS_CLEAR ((1 << 15) - 1)
967 * Mutexes are implied around writer execution. A single writer at a time.
969 active proctype urcu_writer()
972 byte tmp, tmp2, tmpa;
973 byte cur_data = 0, old_data, loop_nr = 0;
974 byte cur_gp_val = 0; /*
975 * Keep a local trace of the current parity so
976 * we don't add non-existing dependencies on the global
977 * GP update. Needed to test single flip case.
980 /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
981 DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr);
982 /* Note ! currently only one reader */
983 DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
985 DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]);
989 DECLARE_PROC_CACHED_VAR(bit, rcu_ptr);
991 DECLARE_PROC_CACHED_VAR(byte, rcu_ptr);
995 INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1);
996 INIT_PROC_CACHED_VAR(rcu_ptr, 0);
1000 :: i < NR_READERS ->
1001 INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0);
1003 :: i >= NR_READERS -> break
1005 INIT_PROC_CACHED_VAR(rcu_data[0], WINE);
1009 INIT_PROC_CACHED_VAR(rcu_data[i], POISON);
1011 :: i >= SLAB_SIZE -> break
1018 assert(get_pid() < NR_PROCS);
1022 #ifdef WRITER_PROGRESS
1025 loop_nr = loop_nr + 1;
1027 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROD_NONE);
1030 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB);
1034 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_MB);
1035 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_MB);
1039 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
1040 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
1041 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT);
1042 /* For single flip, we need to know the current parity */
1043 cur_gp_val = cur_gp_val ^ RCU_GP_CTR_BIT;
1050 :: CONSUME_TOKENS(proc_urcu_writer,
1054 cur_data = (cur_data + 1) % SLAB_SIZE;
1055 WRITE_CACHED_VAR(rcu_data[cur_data], WINE);
1056 PRODUCE_TOKENS(proc_urcu_writer, WRITE_DATA);
1059 :: CONSUME_TOKENS(proc_urcu_writer,
1063 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB);
1065 :: CONSUME_TOKENS(proc_urcu_writer,
1068 /* rcu_xchg_pointer() */
1070 old_data = READ_CACHED_VAR(rcu_ptr);
1071 WRITE_CACHED_VAR(rcu_ptr, cur_data);
1073 PRODUCE_TOKENS(proc_urcu_writer, WRITE_XCHG_PTR);
1075 :: CONSUME_TOKENS(proc_urcu_writer,
1076 WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR,
1077 WRITE_PROC_FIRST_MB) ->
1080 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_MB);
1083 :: CONSUME_TOKENS(proc_urcu_writer,
1084 WRITE_PROC_FIRST_MB,
1085 WRITE_PROC_FIRST_READ_GP) ->
1086 tmpa = READ_CACHED_VAR(urcu_gp_ctr);
1087 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_READ_GP);
1088 :: CONSUME_TOKENS(proc_urcu_writer,
1089 WRITE_PROC_FIRST_MB | WRITE_PROC_WMB
1090 | WRITE_PROC_FIRST_READ_GP,
1091 WRITE_PROC_FIRST_WRITE_GP) ->
1093 WRITE_CACHED_VAR(urcu_gp_ctr, tmpa ^ RCU_GP_CTR_BIT);
1094 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP);
1096 :: CONSUME_TOKENS(proc_urcu_writer,
1097 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
1098 WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
1099 WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) ->
1101 //smp_mb(i); /* TEST */
1102 /* ONLY WAITING FOR READER 0 */
1103 tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
1105 /* In normal execution, we are always starting by
1106 * waiting for the even parity.
1108 cur_gp_val = RCU_GP_CTR_BIT;
1111 :: (tmp2 & RCU_GP_CTR_NEST_MASK)
1112 && ((tmp2 ^ cur_gp_val) & RCU_GP_CTR_BIT) ->
1113 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT_LOOP);
1115 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT);
1118 :: CONSUME_TOKENS(proc_urcu_writer,
1119 //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
1120 WRITE_PROC_FIRST_WRITE_GP
1121 | WRITE_PROC_FIRST_READ_GP
1122 | WRITE_PROC_FIRST_WAIT_LOOP
1123 | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR
1124 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
1126 #ifndef GEN_ERROR_WRITER_PROGRESS
1129 /* The memory barrier will invalidate the
1130 * second read done as prefetching. Note that all
1131 * instructions with side-effects depending on
1132 * WRITE_PROC_SECOND_READ_GP should also depend on
1133 * completion of this busy-waiting loop. */
1134 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
1138 /* This instruction loops to WRITE_PROC_FIRST_WAIT */
1139 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT_LOOP | WRITE_PROC_FIRST_WAIT);
1142 :: CONSUME_TOKENS(proc_urcu_writer,
1143 //WRITE_PROC_FIRST_WAIT | //test /* no dependency. Could pre-fetch, no side-effect. */
1144 WRITE_PROC_FIRST_WRITE_GP
1145 | WRITE_PROC_FIRST_READ_GP
1146 | WRITE_PROC_FIRST_MB,
1147 WRITE_PROC_SECOND_READ_GP) ->
1149 //smp_mb(i); /* TEST */
1150 tmpa = READ_CACHED_VAR(urcu_gp_ctr);
1151 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
1152 :: CONSUME_TOKENS(proc_urcu_writer,
1153 WRITE_PROC_FIRST_WAIT /* dependency on first wait, because this
1154 * instruction has globally observable
1157 | WRITE_PROC_FIRST_MB
1159 | WRITE_PROC_FIRST_READ_GP
1160 | WRITE_PROC_FIRST_WRITE_GP
1161 | WRITE_PROC_SECOND_READ_GP,
1162 WRITE_PROC_SECOND_WRITE_GP) ->
1164 WRITE_CACHED_VAR(urcu_gp_ctr, tmpa ^ RCU_GP_CTR_BIT);
1165 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
1167 :: CONSUME_TOKENS(proc_urcu_writer,
1168 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
1169 WRITE_PROC_FIRST_WAIT
1170 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
1171 WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) ->
1173 //smp_mb(i); /* TEST */
1174 /* ONLY WAITING FOR READER 0 */
1175 tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
1177 :: (tmp2 & RCU_GP_CTR_NEST_MASK)
1178 && ((tmp2 ^ 0) & RCU_GP_CTR_BIT) ->
1179 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT_LOOP);
1181 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT);
1184 :: CONSUME_TOKENS(proc_urcu_writer,
1185 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
1186 WRITE_PROC_SECOND_WRITE_GP
1187 | WRITE_PROC_FIRST_WRITE_GP
1188 | WRITE_PROC_SECOND_READ_GP
1189 | WRITE_PROC_FIRST_READ_GP
1190 | WRITE_PROC_SECOND_WAIT_LOOP
1191 | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR
1192 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
1194 #ifndef GEN_ERROR_WRITER_PROGRESS
1200 /* This instruction loops to WRITE_PROC_SECOND_WAIT */
1201 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT_LOOP | WRITE_PROC_SECOND_WAIT);
1204 :: CONSUME_TOKENS(proc_urcu_writer,
1205 WRITE_PROC_FIRST_WAIT
1206 | WRITE_PROC_SECOND_WAIT
1207 | WRITE_PROC_FIRST_READ_GP
1208 | WRITE_PROC_SECOND_READ_GP
1209 | WRITE_PROC_FIRST_WRITE_GP
1210 | WRITE_PROC_SECOND_WRITE_GP
1211 | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR
1212 | WRITE_PROC_FIRST_MB,
1213 WRITE_PROC_SECOND_MB) ->
1216 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_MB);
1218 :: CONSUME_TOKENS(proc_urcu_writer,
1220 | WRITE_PROC_FIRST_WAIT
1221 | WRITE_PROC_SECOND_WAIT
1222 | WRITE_PROC_WMB /* No dependency on
1223 * WRITE_DATA because we
1225 * different location. */
1226 | WRITE_PROC_SECOND_MB
1227 | WRITE_PROC_FIRST_MB,
1229 WRITE_CACHED_VAR(rcu_data[old_data], POISON);
1230 PRODUCE_TOKENS(proc_urcu_writer, WRITE_FREE);
1232 :: CONSUME_TOKENS(proc_urcu_writer, WRITE_PROC_ALL_TOKENS, 0) ->
1233 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_ALL_TOKENS_CLEAR);
1239 * Note : Promela model adds implicit serialization of the
1240 * WRITE_FREE instruction. Normally, it would be permitted to
1241 * spill on the next loop execution. Given the validation we do
1242 * checks for the data entry read to be poisoned, it's ok if
1243 * we do not check "late arriving" memory poisoning.
1248 * Given the reader loops infinitely, let the writer also busy-loop
1249 * with progress here so, with weak fairness, we can test the
1250 * writer's progress.
1255 #ifdef WRITER_PROGRESS
1258 #ifdef READER_PROGRESS
1260 * Make sure we don't block the reader's progress.
1262 smp_mb_send(i, j, 5);
1267 /* Non-atomic parts of the loop */
1270 smp_mb_send(i, j, 1);
1271 goto smp_mb_send1_end;
1272 #ifndef GEN_ERROR_WRITER_PROGRESS
1274 smp_mb_send(i, j, 2);
1275 goto smp_mb_send2_end;
1277 smp_mb_send(i, j, 3);
1278 goto smp_mb_send3_end;
1281 smp_mb_send(i, j, 4);
1282 goto smp_mb_send4_end;
1287 /* no name clash please */
1288 #undef proc_urcu_writer
1291 /* Leave after the readers and writers so the pid count is ok. */
1296 INIT_CACHED_VAR(urcu_gp_ctr, 1);
1297 INIT_CACHED_VAR(rcu_ptr, 0);
1301 :: i < NR_READERS ->
1302 INIT_CACHED_VAR(urcu_active_readers[i], 0);
1303 ptr_read_first[i] = 1;
1304 ptr_read_second[i] = 1;
1305 data_read_first[i] = WINE;
1306 data_read_second[i] = WINE;
1308 :: i >= NR_READERS -> break
1310 INIT_CACHED_VAR(rcu_data[0], WINE);
1314 INIT_CACHED_VAR(rcu_data[i], POISON);
1316 :: i >= SLAB_SIZE -> break