* output exchanged. Therefore, i post-dominating j ensures that every path
* passing by j will pass by i before reaching the output.
*
+ * Prefetch and speculative execution
+ *
+ * If an instruction depends on the result of a previous branch, but it does not
+ * have side-effects, it can be executed before the branch result is known.
+ * however, it must be restarted if a core-synchronizing instruction is issued.
+ * Note that instructions which depend on the speculative instruction result
+ * but that have side-effects must depend on the branch completion in addition
+ * to the speculatively executed instruction.
+ *
* Other considerations
*
* Note about "volatile" keyword dependency : The compiler will order volatile
* Only Alpha has out-of-order cache bank loads. Other architectures (intel,
* powerpc, arm) ensure that dependent reads won't be reordered. c.f.
* http://www.linuxjournal.com/article/8212)
+ */
#ifdef ARCH_ALPHA
#define HAVE_OOO_CACHE_READ
#endif
#else
#define smp_mb_send(i, j, progressid) smp_mb(i)
-#define smp_mb_reader smp_mb(i)
+#define smp_mb_reader(i, j) smp_mb(i)
#define smp_mb_recv(i, j)
#endif
#define READ_PROD_B_IF_FALSE (1 << 2)
#define READ_PROD_C_IF_TRUE_READ (1 << 3)
-#define PROCEDURE_READ_LOCK(base, consumetoken, producetoken) \
- :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, READ_PROD_A_READ << base) -> \
+#define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken) \
+ :: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) -> \
ooo_mem(i); \
tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base); \
PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \
fi; \
/* IF TRUE */ \
- :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base, \
+ :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */ \
READ_PROD_C_IF_TRUE_READ << base) -> \
ooo_mem(i); \
tmp2 = READ_CACHED_VAR(urcu_gp_ctr); \
PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base); \
:: CONSUME_TOKENS(proc_urcu_reader, \
- (READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \
+ (READ_PROD_B_IF_TRUE \
+ | READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \
| READ_PROD_A_READ) << base, /* WAR */ \
producetoken) -> \
ooo_mem(i); \
consumetoken, \
READ_PROC_READ_UNLOCK << base) -> \
ooo_mem(i); \
- tmp2 = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
+ tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base); \
:: CONSUME_TOKENS(proc_urcu_reader, \
consumetoken \
| (READ_PROC_READ_UNLOCK << base), /* WAR */ \
producetoken) -> \
ooo_mem(i); \
- WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1); \
+ WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1); \
PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
skip
atomic {
if
- PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, READ_LOCK_OUT);
+ PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT);
:: CONSUME_TOKENS(proc_urcu_reader,
READ_LOCK_OUT, /* post-dominant */
smp_mb_reader(i, j);
PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
- PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB | READ_LOCK_OUT,
+ PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB, READ_LOCK_OUT,
READ_LOCK_NESTED_OUT);
:: CONSUME_TOKENS(proc_urcu_reader,
/* reading urcu_active_readers, which have been written by
* READ_UNLOCK_OUT : RAW */
PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE,
- READ_UNLOCK_OUT /* RAW */
- | READ_PROC_SECOND_MB /* mb() orders reads */
- | READ_PROC_FIRST_MB /* mb() orders reads */
- | READ_LOCK_NESTED_OUT /* RAW */
+ READ_PROC_SECOND_MB /* mb() orders reads */
+ | READ_PROC_FIRST_MB, /* mb() orders reads */
+ READ_LOCK_NESTED_OUT /* RAW */
| READ_LOCK_OUT /* RAW */
- | READ_UNLOCK_NESTED_OUT, /* RAW */
+ | READ_UNLOCK_NESTED_OUT /* RAW */
+ | READ_UNLOCK_OUT, /* RAW */
READ_LOCK_OUT_UNROLL);
PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP);
:: CONSUME_TOKENS(proc_urcu_writer,
- //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
+ //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) ->
ooo_mem(i);
+ //smp_mb(i); /* TEST */
/* ONLY WAITING FOR READER 0 */
tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
#ifndef SINGLE_FLIP
#ifndef GEN_ERROR_WRITER_PROGRESS
goto smp_mb_send2;
smp_mb_send2_end:
+ /* The memory barrier will invalidate the
+ * second read done as prefetching. Note that all
+ * instructions with side-effects depending on
+ * WRITE_PROC_SECOND_READ_GP should also depend on
+ * completion of this busy-waiting loop. */
+ CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
#else
ooo_mem(i);
#endif
/* second flip */
:: CONSUME_TOKENS(proc_urcu_writer,
- WRITE_PROC_FIRST_WAIT /* Control dependency : need to branch out of
- * the loop to execute the next flip (CHECK) */
- | WRITE_PROC_FIRST_WRITE_GP
+ //WRITE_PROC_FIRST_WAIT | //test /* no dependency. Could pre-fetch, no side-effect. */
+ WRITE_PROC_FIRST_WRITE_GP
| WRITE_PROC_FIRST_READ_GP
| WRITE_PROC_FIRST_MB,
WRITE_PROC_SECOND_READ_GP) ->
ooo_mem(i);
+ //smp_mb(i); /* TEST */
tmpa = READ_CACHED_VAR(urcu_gp_ctr);
PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
:: CONSUME_TOKENS(proc_urcu_writer,
- WRITE_PROC_FIRST_MB
+ WRITE_PROC_FIRST_WAIT /* dependency on first wait, because this
+ * instruction has globally observable
+ * side-effects.
+ */
+ | WRITE_PROC_FIRST_MB
| WRITE_PROC_WMB
| WRITE_PROC_FIRST_READ_GP
| WRITE_PROC_FIRST_WRITE_GP
PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
:: CONSUME_TOKENS(proc_urcu_writer,
- //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
+ //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
WRITE_PROC_FIRST_WAIT
| WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) ->
ooo_mem(i);
+ //smp_mb(i); /* TEST */
/* ONLY WAITING FOR READER 0 */
tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
if
fi;
:: CONSUME_TOKENS(proc_urcu_writer,
- //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
+ //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
WRITE_PROC_SECOND_WRITE_GP
| WRITE_PROC_FIRST_WRITE_GP
| WRITE_PROC_SECOND_READ_GP