2 * mem.spin: Promela code to validate memory barriers with OOO memory
3 * and out-of-order instruction scheduling.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Copyright (c) 2009 Mathieu Desnoyers
22 /* Promela validation variables. */
24 /* specific defines "included" here */
25 /* DEFINES file "included" here */
32 #define get_pid() (_pid)
34 #define get_readerid() (get_pid())
37 * Produced process control and data flow. Updated after each instruction to
38 * show which variables are ready. Using one-hot bit encoding per variable to
39 * save state space. Used as triggers to execute the instructions having those
40 * variables as input. Leaving bits active to inhibit instruction execution.
41 * Scheme used to make instruction disabling and automatic dependency fall-back
45 #define CONSUME_TOKENS(state, bits, notbits) \
46 ((!(state & (notbits))) && (state & (bits)) == (bits))
48 #define PRODUCE_TOKENS(state, bits) \
49 state = state | (bits);
51 #define CLEAR_TOKENS(state, bits) \
52 state = state & ~(bits)
55 * Types of dependency :
59 * - True dependency, Read-after-Write (RAW)
61 * This type of dependency happens when a statement depends on the result of a
62 * previous statement. This applies to any statement which needs to read a
63 * variable written by a preceding statement.
65 * - False dependency, Write-after-Read (WAR)
67 * Typically, variable renaming can ensure that this dependency goes away.
68 * However, if the statements must read and then write from/to the same variable
69 * in the OOO memory model, renaming may be impossible, and therefore this
70 * causes a WAR dependency.
72 * - Output dependency, Write-after-Write (WAW)
74 * Two writes to the same variable in subsequent statements. Variable renaming
75 * can ensure this is not needed, but can be required when writing multiple
76 * times to the same OOO mem model variable.
80 * Execution of a given instruction depends on a previous instruction evaluating
81 * in a way that allows its execution. E.g. : branches.
83 * Useful considerations for joining dependencies after branch
87 * "We say box i dominates box j if every path (leading from input to output
88 * through the diagram) which passes through box j must also pass through box
89 * i. Thus box i dominates box j if box j is subordinate to box i in the
92 * http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
93 * Other classic algorithm to calculate dominance : Lengauer-Tarjan (in gcc)
97 * Just as pre-dominance, but with arcs of the data flow inverted, and input vs
98 * output exchanged. Therefore, i post-dominating j ensures that every path
99 * passing by j will pass by i before reaching the output.
101 * Other considerations
103 * Note about "volatile" keyword dependency : The compiler will order volatile
104 * accesses so they appear in the right order on a given CPU. They can be
105 * reordered by the CPU instruction scheduling. This therefore cannot be
106 * considered as a depencency.
110 * Cooper, Keith D.; & Torczon, Linda. (2005). Engineering a Compiler. Morgan
111 * Kaufmann. ISBN 1-55860-698-X.
112 * Kennedy, Ken; & Allen, Randy. (2001). Optimizing Compilers for Modern
113 * Architectures: A Dependence-based Approach. Morgan Kaufmann. ISBN
115 * Muchnick, Steven S. (1997). Advanced Compiler Design and Implementation.
116 * Morgan Kaufmann. ISBN 1-55860-320-4.
120 * Note about loops and nested calls
122 * To keep this model simple, loops expressed in the framework will behave as if
123 * there was a core synchronizing instruction between loops. To see the effect
124 * of loop unrolling, manually unrolling loops is required. Note that if loops
125 * end or start with a core synchronizing instruction, the model is appropriate.
126 * Nested calls are not supported.
130 * Each process have its own data in cache. Caches are randomly updated.
131 * smp_wmb and smp_rmb forces cache updates (write and read), smp_mb forces
135 typedef per_proc_byte {
139 /* Bitfield has a maximum of 8 procs */
140 typedef per_proc_bit {
144 #define DECLARE_CACHED_VAR(type, x) \
146 per_proc_##type cached_##x; \
147 per_proc_bit cache_dirty_##x;
149 #define INIT_CACHED_VAR(x, v, j) \
151 cache_dirty_##x.bitfield = 0; \
155 cached_##x.val[j] = v; \
157 :: j >= NR_PROCS -> break \
160 #define IS_CACHE_DIRTY(x, id) (cache_dirty_##x.bitfield & (1 << id))
162 #define READ_CACHED_VAR(x) (cached_##x.val[get_pid()])
164 #define WRITE_CACHED_VAR(x, v) \
166 cached_##x.val[get_pid()] = v; \
167 cache_dirty_##x.bitfield = \
168 cache_dirty_##x.bitfield | (1 << get_pid()); \
171 #define CACHE_WRITE_TO_MEM(x, id) \
173 :: IS_CACHE_DIRTY(x, id) -> \
174 mem_##x = cached_##x.val[id]; \
175 cache_dirty_##x.bitfield = \
176 cache_dirty_##x.bitfield & (~(1 << id)); \
181 #define CACHE_READ_FROM_MEM(x, id) \
183 :: !IS_CACHE_DIRTY(x, id) -> \
184 cached_##x.val[id] = mem_##x;\
190 * May update other caches if cache is dirty, or not.
192 #define RANDOM_CACHE_WRITE_TO_MEM(x, id)\
194 :: 1 -> CACHE_WRITE_TO_MEM(x, id); \
198 #define RANDOM_CACHE_READ_FROM_MEM(x, id)\
200 :: 1 -> CACHE_READ_FROM_MEM(x, id); \
204 /* Must consume all prior read tokens. All subsequent reads depend on it. */
208 CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
212 CACHE_READ_FROM_MEM(urcu_active_readers[i], get_pid());
214 :: i >= NR_READERS -> break
216 CACHE_READ_FROM_MEM(rcu_ptr, get_pid());
220 CACHE_READ_FROM_MEM(rcu_data[i], get_pid());
222 :: i >= SLAB_SIZE -> break
227 /* Must consume all prior write tokens. All subsequent writes depend on it. */
231 CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
235 CACHE_WRITE_TO_MEM(urcu_active_readers[i], get_pid());
237 :: i >= NR_READERS -> break
239 CACHE_WRITE_TO_MEM(rcu_ptr, get_pid());
243 CACHE_WRITE_TO_MEM(rcu_data[i], get_pid());
245 :: i >= SLAB_SIZE -> break
250 /* Synchronization point. Must consume all prior read and write tokens. All
251 * subsequent reads and writes depend on it. */
260 #ifdef REMOTE_BARRIERS
262 bit reader_barrier[NR_READERS];
265 * We cannot leave the barriers dependencies in place in REMOTE_BARRIERS mode
266 * because they would add unexisting core synchronization and would therefore
267 * create an incomplete model.
268 * Therefore, we model the read-side memory barriers by completely disabling the
269 * memory barriers and their dependencies from the read-side. One at a time
270 * (different verification runs), we make a different instruction listen for
274 #define smp_mb_reader(i, j)
277 * Service 0, 1 or many barrier requests.
279 inline smp_mb_recv(i, j)
282 :: (reader_barrier[get_readerid()] == 1) ->
284 reader_barrier[get_readerid()] = 0;
287 * Busy-looping waiting for other barrier requests is not considered as
290 #ifdef READER_PROGRESS
293 #ifdef WRITER_PROGRESS
294 //progress_writer_from_reader1:
298 /* We choose to ignore writer's non-progress caused from the
299 * reader ignoring the writer's mb() requests */
300 #ifdef WRITER_PROGRESS
301 //progress_writer_from_reader2:
307 #ifdef WRITER_PROGRESS
308 #define PROGRESS_LABEL(progressid) progress_writer_progid_##progressid:
310 #define PROGRESS_LABEL(progressid)
313 #define smp_mb_send(i, j, progressid) \
318 :: i < NR_READERS -> \
319 reader_barrier[i] = 1; \
321 * Busy-looping waiting for reader barrier handling is of little\
322 * interest, given the reader has the ability to totally ignore \
323 * barrier requests. \
325 PROGRESS_LABEL(progressid) \
327 :: (reader_barrier[i] == 1) -> skip; \
328 :: (reader_barrier[i] == 0) -> break; \
331 :: i >= NR_READERS -> \
339 #define smp_mb_send(i, j, progressid) smp_mb(i, j)
340 #define smp_mb_reader smp_mb
341 #define smp_mb_recv(i, j)
345 /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
346 DECLARE_CACHED_VAR(byte, urcu_gp_ctr);
347 /* Note ! currently only two readers */
348 DECLARE_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
350 DECLARE_CACHED_VAR(byte, rcu_ptr);
352 DECLARE_CACHED_VAR(byte, rcu_data[2]);
354 byte ptr_read[NR_READERS];
355 byte data_read[NR_READERS];
359 inline wait_init_done()
362 :: init_done == 0 -> skip;
370 RANDOM_CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
374 RANDOM_CACHE_WRITE_TO_MEM(urcu_active_readers[i],
377 :: i >= NR_READERS -> break
379 RANDOM_CACHE_WRITE_TO_MEM(rcu_ptr, get_pid());
383 RANDOM_CACHE_WRITE_TO_MEM(rcu_data[i], get_pid());
385 :: i >= SLAB_SIZE -> break
387 RANDOM_CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
391 RANDOM_CACHE_READ_FROM_MEM(urcu_active_readers[i],
394 :: i >= NR_READERS -> break
396 RANDOM_CACHE_READ_FROM_MEM(rcu_ptr, get_pid());
400 RANDOM_CACHE_READ_FROM_MEM(rcu_data[i], get_pid());
402 :: i >= SLAB_SIZE -> break
408 * Bit encoding, urcu_reader :
411 int _proc_urcu_reader;
412 #define proc_urcu_reader _proc_urcu_reader
414 /* Body of PROCEDURE_READ_LOCK */
415 #define READ_PROD_A_READ (1 << 0)
416 #define READ_PROD_B_IF_TRUE (1 << 1)
417 #define READ_PROD_B_IF_FALSE (1 << 2)
418 #define READ_PROD_C_IF_TRUE_READ (1 << 3)
420 #define PROCEDURE_READ_LOCK(base, consumetoken, producetoken) \
421 :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, READ_PROD_A_READ << base) -> \
423 tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
424 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base); \
425 :: CONSUME_TOKENS(proc_urcu_reader, \
426 READ_PROD_A_READ << base, /* RAW, pre-dominant */ \
427 (READ_PROD_B_IF_TRUE | READ_PROD_B_IF_FALSE) << base) -> \
429 :: (!(tmp & RCU_GP_CTR_NEST_MASK)) -> \
430 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base); \
432 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \
435 :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base, \
436 READ_PROD_C_IF_TRUE_READ << base) -> \
438 tmp2 = READ_CACHED_VAR(urcu_gp_ctr); \
439 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base); \
440 :: CONSUME_TOKENS(proc_urcu_reader, \
441 (READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \
442 | READ_PROD_A_READ) << base, /* WAR */ \
445 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2); \
446 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
447 /* IF_MERGE implies \
448 * post-dominance */ \
450 :: CONSUME_TOKENS(proc_urcu_reader, \
451 (READ_PROD_B_IF_FALSE /* pre-dominant */ \
452 | READ_PROD_A_READ) << base, /* WAR */ \
455 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], \
457 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
458 /* IF_MERGE implies \
459 * post-dominance */ \
463 /* Body of PROCEDURE_READ_LOCK */
464 #define READ_PROC_READ_UNLOCK (1 << 0)
466 #define PROCEDURE_READ_UNLOCK(base, consumetoken, producetoken) \
467 :: CONSUME_TOKENS(proc_urcu_reader, \
469 READ_PROC_READ_UNLOCK << base) -> \
471 tmp2 = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
472 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base); \
473 :: CONSUME_TOKENS(proc_urcu_reader, \
475 | (READ_PROC_READ_UNLOCK << base), /* WAR */ \
478 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1); \
479 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
483 #define READ_PROD_NONE (1 << 0)
485 /* PROCEDURE_READ_LOCK base = << 1 : 1 to 5 */
486 #define READ_LOCK_BASE 1
487 #define READ_LOCK_OUT (1 << 5)
489 #define READ_PROC_FIRST_MB (1 << 6)
491 /* PROCEDURE_READ_LOCK (NESTED) base : << 7 : 7 to 11 */
492 #define READ_LOCK_NESTED_BASE 7
493 #define READ_LOCK_NESTED_OUT (1 << 11)
495 #define READ_PROC_READ_GEN (1 << 12)
496 #define READ_PROC_ACCESS_GEN (1 << 13)
498 /* PROCEDURE_READ_UNLOCK (NESTED) base = << 14 : 14 to 15 */
499 #define READ_UNLOCK_NESTED_BASE 14
500 #define READ_UNLOCK_NESTED_OUT (1 << 15)
502 #define READ_PROC_SECOND_MB (1 << 16)
504 /* PROCEDURE_READ_UNLOCK base = << 17 : 17 to 18 */
505 #define READ_UNLOCK_BASE 17
506 #define READ_UNLOCK_OUT (1 << 18)
508 /* PROCEDURE_READ_LOCK_UNROLL base = << 19 : 19 to 23 */
509 #define READ_LOCK_UNROLL_BASE 19
510 #define READ_LOCK_OUT_UNROLL (1 << 23)
512 #define READ_PROC_THIRD_MB (1 << 24)
514 #define READ_PROC_READ_GEN_UNROLL (1 << 25)
515 #define READ_PROC_ACCESS_GEN_UNROLL (1 << 26)
517 #define READ_PROC_FOURTH_MB (1 << 27)
519 /* PROCEDURE_READ_UNLOCK_UNROLL base = << 28 : 28 to 29 */
520 #define READ_UNLOCK_UNROLL_BASE 28
521 #define READ_UNLOCK_OUT_UNROLL (1 << 29)
524 /* Should not include branches */
525 #define READ_PROC_ALL_TOKENS (READ_PROD_NONE \
527 | READ_PROC_FIRST_MB \
528 | READ_LOCK_NESTED_OUT \
529 | READ_PROC_READ_GEN \
530 | READ_PROC_ACCESS_GEN \
531 | READ_UNLOCK_NESTED_OUT \
532 | READ_PROC_SECOND_MB \
534 | READ_LOCK_OUT_UNROLL \
535 | READ_PROC_THIRD_MB \
536 | READ_PROC_READ_GEN_UNROLL \
537 | READ_PROC_ACCESS_GEN_UNROLL \
538 | READ_PROC_FOURTH_MB \
539 | READ_UNLOCK_OUT_UNROLL)
541 /* Must clear all tokens, including branches */
542 #define READ_PROC_ALL_TOKENS_CLEAR ((1 << 30) - 1)
544 inline urcu_one_read(i, j, nest_i, tmp, tmp2)
546 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_NONE);
549 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
550 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
551 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_THIRD_MB);
552 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FOURTH_MB);
555 #ifdef REMOTE_BARRIERS
556 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
557 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
558 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_THIRD_MB);
559 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FOURTH_MB);
565 #ifdef REMOTE_BARRIERS
567 * Signal-based memory barrier will only execute when the
568 * execution order appears in program order.
574 :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE,
575 READ_LOCK_OUT | READ_LOCK_NESTED_OUT
576 | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
578 | READ_LOCK_OUT_UNROLL
579 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
580 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT,
582 | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
584 | READ_LOCK_OUT_UNROLL
585 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
586 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT | READ_LOCK_NESTED_OUT,
587 READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
589 | READ_LOCK_OUT_UNROLL
590 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
591 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
592 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN,
593 READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
595 | READ_LOCK_OUT_UNROLL
596 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
597 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
598 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN,
599 READ_UNLOCK_NESTED_OUT
601 | READ_LOCK_OUT_UNROLL
602 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
603 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
604 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
605 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT,
607 | READ_LOCK_OUT_UNROLL
608 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
609 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
610 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
611 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
614 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
615 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
616 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
617 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
618 | READ_UNLOCK_OUT | READ_LOCK_OUT_UNROLL,
619 READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
620 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
621 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
622 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
623 | READ_UNLOCK_OUT | READ_LOCK_OUT_UNROLL
624 | READ_PROC_READ_GEN_UNROLL,
625 READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL)
626 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
627 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN
628 | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
629 | READ_UNLOCK_OUT | READ_LOCK_OUT_UNROLL
630 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL,
631 READ_UNLOCK_OUT_UNROLL)
632 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE | READ_LOCK_OUT
633 | READ_LOCK_NESTED_OUT | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN | READ_UNLOCK_NESTED_OUT
634 | READ_UNLOCK_OUT | READ_LOCK_OUT_UNROLL
635 | READ_PROC_READ_GEN_UNROLL | READ_PROC_ACCESS_GEN_UNROLL | READ_UNLOCK_OUT_UNROLL,
645 goto non_atomic3_skip;
648 goto non_atomic3_end;
651 #endif /* REMOTE_BARRIERS */
655 PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, READ_LOCK_OUT);
657 :: CONSUME_TOKENS(proc_urcu_reader,
658 READ_LOCK_OUT, /* post-dominant */
659 READ_PROC_FIRST_MB) ->
661 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
663 PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB | READ_LOCK_OUT,
664 READ_LOCK_NESTED_OUT);
666 :: CONSUME_TOKENS(proc_urcu_reader,
667 READ_PROC_FIRST_MB, /* mb() orders reads */
668 READ_PROC_READ_GEN) ->
670 ptr_read[get_readerid()] =
671 READ_CACHED_VAR(rcu_ptr);
672 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_GEN);
674 :: CONSUME_TOKENS(proc_urcu_reader,
675 READ_PROC_FIRST_MB /* mb() orders reads */
676 | READ_PROC_READ_GEN,
677 READ_PROC_ACCESS_GEN) ->
678 /* smp_read_barrier_depends */
681 data_read[get_readerid()] =
682 READ_CACHED_VAR(rcu_data[ptr_read[get_readerid()]]);
683 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_ACCESS_GEN);
686 /* Note : we remove the nested memory barrier from the read unlock
687 * model, given it is not usually needed. The implementation has the barrier
688 * because the performance impact added by a branch in the common case does not
692 PROCEDURE_READ_UNLOCK(READ_UNLOCK_NESTED_BASE,
695 | READ_LOCK_NESTED_OUT,
696 READ_UNLOCK_NESTED_OUT);
699 :: CONSUME_TOKENS(proc_urcu_reader,
700 READ_PROC_ACCESS_GEN /* mb() orders reads */
701 | READ_PROC_READ_GEN /* mb() orders reads */
702 | READ_PROC_FIRST_MB /* mb() ordered */
703 | READ_LOCK_OUT /* post-dominant */
704 | READ_LOCK_NESTED_OUT /* post-dominant */
705 | READ_UNLOCK_NESTED_OUT,
706 READ_PROC_SECOND_MB) ->
708 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
710 PROCEDURE_READ_UNLOCK(READ_UNLOCK_BASE,
711 READ_PROC_SECOND_MB /* mb() orders reads */
712 | READ_PROC_FIRST_MB /* mb() orders reads */
713 | READ_LOCK_NESTED_OUT /* RAW */
714 | READ_LOCK_OUT /* RAW */
715 | READ_UNLOCK_NESTED_OUT, /* RAW */
718 /* Unrolling loop : second consecutive lock */
719 /* reading urcu_active_readers, which have been written by
720 * READ_UNLOCK_OUT : RAW */
721 PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE,
722 READ_UNLOCK_OUT /* RAW */
723 | READ_PROC_SECOND_MB /* mb() orders reads */
724 | READ_PROC_FIRST_MB /* mb() orders reads */
725 | READ_LOCK_NESTED_OUT /* RAW */
726 | READ_LOCK_OUT /* RAW */
727 | READ_UNLOCK_NESTED_OUT, /* RAW */
728 READ_LOCK_OUT_UNROLL);
731 :: CONSUME_TOKENS(proc_urcu_reader,
732 READ_PROC_FIRST_MB /* mb() ordered */
733 | READ_PROC_SECOND_MB /* mb() ordered */
734 | READ_LOCK_OUT_UNROLL /* post-dominant */
735 | READ_LOCK_NESTED_OUT
737 | READ_UNLOCK_NESTED_OUT
739 READ_PROC_THIRD_MB) ->
741 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_THIRD_MB);
743 :: CONSUME_TOKENS(proc_urcu_reader,
744 READ_PROC_FIRST_MB /* mb() orders reads */
745 | READ_PROC_SECOND_MB /* mb() orders reads */
746 | READ_PROC_THIRD_MB, /* mb() orders reads */
747 READ_PROC_READ_GEN_UNROLL) ->
749 ptr_read[get_readerid()] = READ_CACHED_VAR(rcu_ptr);
750 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_GEN_UNROLL);
752 :: CONSUME_TOKENS(proc_urcu_reader,
753 READ_PROC_READ_GEN_UNROLL
754 | READ_PROC_FIRST_MB /* mb() orders reads */
755 | READ_PROC_SECOND_MB /* mb() orders reads */
756 | READ_PROC_THIRD_MB, /* mb() orders reads */
757 READ_PROC_ACCESS_GEN_UNROLL) ->
758 /* smp_read_barrier_depends */
761 data_read[get_readerid()] =
762 READ_CACHED_VAR(rcu_data[ptr_read[get_readerid()]]);
763 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_ACCESS_GEN_UNROLL);
765 :: CONSUME_TOKENS(proc_urcu_reader,
766 READ_PROC_READ_GEN_UNROLL /* mb() orders reads */
767 | READ_PROC_ACCESS_GEN_UNROLL /* mb() orders reads */
768 | READ_PROC_FIRST_MB /* mb() ordered */
769 | READ_PROC_SECOND_MB /* mb() ordered */
770 | READ_PROC_THIRD_MB /* mb() ordered */
771 | READ_LOCK_OUT_UNROLL /* post-dominant */
772 | READ_LOCK_NESTED_OUT
774 | READ_UNLOCK_NESTED_OUT
776 READ_PROC_FOURTH_MB) ->
778 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FOURTH_MB);
780 PROCEDURE_READ_UNLOCK(READ_UNLOCK_UNROLL_BASE,
781 READ_PROC_FOURTH_MB /* mb() orders reads */
782 | READ_PROC_THIRD_MB /* mb() orders reads */
783 | READ_LOCK_OUT_UNROLL /* RAW */
784 | READ_PROC_SECOND_MB /* mb() orders reads */
785 | READ_PROC_FIRST_MB /* mb() orders reads */
786 | READ_LOCK_NESTED_OUT /* RAW */
787 | READ_LOCK_OUT /* RAW */
788 | READ_UNLOCK_NESTED_OUT, /* RAW */
789 READ_UNLOCK_OUT_UNROLL);
790 :: CONSUME_TOKENS(proc_urcu_reader, READ_PROC_ALL_TOKENS, 0) ->
791 CLEAR_TOKENS(proc_urcu_reader, READ_PROC_ALL_TOKENS_CLEAR);
797 * Dependency between consecutive loops :
799 * WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1)
800 * tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);
802 * _WHEN THE MB()s are in place_, they add full ordering of the
803 * generation pointer read wrt active reader count read, which ensures
804 * execution will not spill across loop execution.
805 * However, in the event mb()s are removed (execution using signal
806 * handler to promote barrier()() -> smp_mb()), nothing prevents one loop
807 * to spill its execution on other loop's execution.
830 active proctype urcu_reader()
837 assert(get_pid() < NR_PROCS);
843 * We do not test reader's progress here, because we are mainly
844 * interested in writer's progress. The reader never blocks
845 * anyway. We have to test for reader/writer's progress
846 * separately, otherwise we could think the writer is doing
847 * progress when it's blocked by an always progressing reader.
849 #ifdef READER_PROGRESS
852 urcu_one_read(i, j, nest_i, tmp, tmp2);
856 /* no name clash please */
857 #undef proc_urcu_reader
860 /* Model the RCU update process. */
863 * Bit encoding, urcu_writer :
864 * Currently only supports one reader.
867 int _proc_urcu_writer;
868 #define proc_urcu_writer _proc_urcu_writer
870 #define WRITE_PROD_NONE (1 << 0)
872 #define WRITE_PROC_FIRST_MB (1 << 1)
875 #define WRITE_PROC_FIRST_READ_GP (1 << 2)
876 #define WRITE_PROC_FIRST_WRITE_GP (1 << 3)
877 #define WRITE_PROC_FIRST_WAIT (1 << 4)
878 #define WRITE_PROC_FIRST_WAIT_LOOP (1 << 5)
881 #define WRITE_PROC_SECOND_READ_GP (1 << 6)
882 #define WRITE_PROC_SECOND_WRITE_GP (1 << 7)
883 #define WRITE_PROC_SECOND_WAIT (1 << 8)
884 #define WRITE_PROC_SECOND_WAIT_LOOP (1 << 9)
886 #define WRITE_PROC_SECOND_MB (1 << 10)
888 #define WRITE_PROC_ALL_TOKENS (WRITE_PROD_NONE \
889 | WRITE_PROC_FIRST_MB \
890 | WRITE_PROC_FIRST_READ_GP \
891 | WRITE_PROC_FIRST_WRITE_GP \
892 | WRITE_PROC_FIRST_WAIT \
893 | WRITE_PROC_SECOND_READ_GP \
894 | WRITE_PROC_SECOND_WRITE_GP \
895 | WRITE_PROC_SECOND_WAIT \
896 | WRITE_PROC_SECOND_MB)
898 #define WRITE_PROC_ALL_TOKENS_CLEAR ((1 << 11) - 1)
901 * Mutexes are implied around writer execution. A single writer at a time.
903 active proctype urcu_writer()
906 byte tmp, tmp2, tmpa;
907 byte cur_data = 0, old_data, loop_nr = 0;
908 byte cur_gp_val = 0; /*
909 * Keep a local trace of the current parity so
910 * we don't add non-existing dependencies on the global
911 * GP update. Needed to test single flip case.
916 assert(get_pid() < NR_PROCS);
920 #ifdef WRITER_PROGRESS
923 loop_nr = loop_nr + 1;
925 /* TODO : add instruction scheduling to this code path to test
926 * missing wmb effect. */
927 /* smp_wmb() ensures order of the following instructions */
929 cur_data = (cur_data + 1) % SLAB_SIZE;
931 WRITE_CACHED_VAR(rcu_data[cur_data], WINE);
937 old_data = READ_CACHED_VAR(rcu_ptr);
939 WRITE_CACHED_VAR(rcu_ptr, cur_data); /* rcu_assign_pointer() */
943 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROD_NONE);
946 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_MB);
947 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_MB);
951 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
952 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
953 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT);
954 /* For single flip, we need to know the current parity */
955 cur_gp_val = cur_gp_val ^ RCU_GP_CTR_BIT;
961 :: CONSUME_TOKENS(proc_urcu_writer,
963 WRITE_PROC_FIRST_MB) ->
966 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_MB);
969 :: CONSUME_TOKENS(proc_urcu_writer,
971 WRITE_PROC_FIRST_READ_GP) ->
972 tmpa = READ_CACHED_VAR(urcu_gp_ctr);
973 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_READ_GP);
974 :: CONSUME_TOKENS(proc_urcu_writer,
975 WRITE_PROC_FIRST_MB | WRITE_PROC_FIRST_READ_GP,
976 WRITE_PROC_FIRST_WRITE_GP) ->
978 WRITE_CACHED_VAR(urcu_gp_ctr, tmpa ^ RCU_GP_CTR_BIT);
979 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP);
981 :: CONSUME_TOKENS(proc_urcu_writer,
982 //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
983 WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
984 WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) ->
986 /* ONLY WAITING FOR READER 0 */
987 tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
989 /* In normal execution, we are always starting by
990 * waiting for the even parity.
992 cur_gp_val = RCU_GP_CTR_BIT;
995 :: (tmp2 & RCU_GP_CTR_NEST_MASK)
996 && ((tmp2 ^ cur_gp_val) & RCU_GP_CTR_BIT) ->
997 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT_LOOP);
999 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT);
1002 :: CONSUME_TOKENS(proc_urcu_writer,
1003 //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
1004 WRITE_PROC_FIRST_WRITE_GP
1005 | WRITE_PROC_FIRST_READ_GP
1006 | WRITE_PROC_FIRST_WAIT_LOOP
1007 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
1009 #ifndef GEN_ERROR_WRITER_PROGRESS
1015 /* This instruction loops to WRITE_PROC_FIRST_WAIT */
1016 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT_LOOP | WRITE_PROC_FIRST_WAIT);
1019 :: CONSUME_TOKENS(proc_urcu_writer,
1020 WRITE_PROC_FIRST_WAIT /* Control dependency : need to branch out of
1021 * the loop to execute the next flip (CHECK) */
1022 | WRITE_PROC_FIRST_WRITE_GP
1023 | WRITE_PROC_FIRST_READ_GP
1024 | WRITE_PROC_FIRST_MB,
1025 WRITE_PROC_SECOND_READ_GP) ->
1027 tmpa = READ_CACHED_VAR(urcu_gp_ctr);
1028 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
1029 :: CONSUME_TOKENS(proc_urcu_writer,
1031 | WRITE_PROC_FIRST_READ_GP
1032 | WRITE_PROC_FIRST_WRITE_GP
1033 | WRITE_PROC_SECOND_READ_GP,
1034 WRITE_PROC_SECOND_WRITE_GP) ->
1036 WRITE_CACHED_VAR(urcu_gp_ctr, tmpa ^ RCU_GP_CTR_BIT);
1037 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
1039 :: CONSUME_TOKENS(proc_urcu_writer,
1040 //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
1041 WRITE_PROC_FIRST_WAIT
1042 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
1043 WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) ->
1045 /* ONLY WAITING FOR READER 0 */
1046 tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
1048 :: (tmp2 & RCU_GP_CTR_NEST_MASK)
1049 && ((tmp2 ^ 0) & RCU_GP_CTR_BIT) ->
1050 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT_LOOP);
1052 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT);
1055 :: CONSUME_TOKENS(proc_urcu_writer,
1056 //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
1057 WRITE_PROC_SECOND_WRITE_GP
1058 | WRITE_PROC_FIRST_WRITE_GP
1059 | WRITE_PROC_SECOND_READ_GP
1060 | WRITE_PROC_FIRST_READ_GP
1061 | WRITE_PROC_SECOND_WAIT_LOOP
1062 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
1064 #ifndef GEN_ERROR_WRITER_PROGRESS
1070 /* This instruction loops to WRITE_PROC_SECOND_WAIT */
1071 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT_LOOP | WRITE_PROC_SECOND_WAIT);
1074 :: CONSUME_TOKENS(proc_urcu_writer,
1075 WRITE_PROC_FIRST_WAIT
1076 | WRITE_PROC_SECOND_WAIT
1077 | WRITE_PROC_FIRST_READ_GP
1078 | WRITE_PROC_SECOND_READ_GP
1079 | WRITE_PROC_FIRST_WRITE_GP
1080 | WRITE_PROC_SECOND_WRITE_GP
1081 | WRITE_PROC_FIRST_MB,
1082 WRITE_PROC_SECOND_MB) ->
1085 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_MB);
1087 :: CONSUME_TOKENS(proc_urcu_writer, WRITE_PROC_ALL_TOKENS, 0) ->
1088 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_ALL_TOKENS_CLEAR);
1094 WRITE_CACHED_VAR(rcu_data[old_data], POISON);
1099 * Given the reader loops infinitely, let the writer also busy-loop
1100 * with progress here so, with weak fairness, we can test the
1101 * writer's progress.
1106 #ifdef WRITER_PROGRESS
1112 /* Non-atomic parts of the loop */
1115 smp_mb_send(i, j, 1);
1116 goto smp_mb_send1_end;
1117 #ifndef GEN_ERROR_WRITER_PROGRESS
1119 smp_mb_send(i, j, 2);
1120 goto smp_mb_send2_end;
1122 smp_mb_send(i, j, 3);
1123 goto smp_mb_send3_end;
1126 smp_mb_send(i, j, 4);
1127 goto smp_mb_send4_end;
1132 /* no name clash please */
1133 #undef proc_urcu_writer
1136 /* Leave after the readers and writers so the pid count is ok. */
1141 INIT_CACHED_VAR(urcu_gp_ctr, 1, j);
1142 INIT_CACHED_VAR(rcu_ptr, 0, j);
1146 :: i < NR_READERS ->
1147 INIT_CACHED_VAR(urcu_active_readers[i], 0, j);
1150 :: i >= NR_READERS -> break
1152 INIT_CACHED_VAR(rcu_data[0], WINE, j);
1156 INIT_CACHED_VAR(rcu_data[i], POISON, j);
1158 :: i >= SLAB_SIZE -> break