* volatile access to implement rcu_dereference rather than
* memory_order_consume load from the C11/C++11 standards.
*
+ * CAUTION: If URCU_DEREFERENCE_USE_VOLATILE is defined, pointers comparisons
+ * _must_ be done using the `cmm_ptr_eq' static inline helper function to
+ * prevent the compiler optimizations from losing addresses dependencies,
+ * resulting in the loss of the publication guarantee.
+ *
* This may improve performance on weakly-ordered architectures where
* the compiler implements memory_order_consume as a
* memory_order_acquire, which is stricter than required by the
* meets the 10-line criterion in LGPL, allowing this function to be
* expanded directly in non-LGPL code.
*/
-
-#if !defined (URCU_DEREFERENCE_USE_VOLATILE) && \
- ((defined (__cplusplus) && __cplusplus >= 201103L) || \
- (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L))
-# define __URCU_DEREFERENCE_USE_ATOMIC_CONSUME
+#ifdef URCU_DEREFERENCE_USE_VOLATILE
+# define _rcu_dereference(p) \
+ __extension__ \
+ ({ \
+ cmm_smp_rmc(); \
+ CMM_ACCESS_ONCE(p); \
+ })
+#else
+# define _rcu_dereference(p) \
+ uatomic_load(&(p), CMM_CONSUME)
#endif
-/*
- * If p is const (the pointer itself, not what it points to), using
- * __typeof__(p) would declare a const variable, leading to
- * -Wincompatible-pointer-types errors. Using the statement expression
- * makes it an rvalue and gets rid of the const-ness.
+/**
+ * Compare pointers A and B, but prevent the compiler from using this knowledge
+ * for optimization.
+ *
+ * This may introduce some overhead. For example, compilers might allocate more
+ * register, resulting in register spilling.
+ *
+ * For example, in the following code, `cmm_ptr_eq' prevents the compiler from
+ * re-using the derefence of `a' at the first call of `do_func()' for the second
+ * call. If a simple equality comparison was used, the compiler could use the
+ * value of `global->x' across two RCU critical regions.
+ *
+ * struct work {
+ * long x;
+ * }
+ *
+ * struct work *global;
+ *
+ * void func(void)
+ * {
+ * struct work *a, *b;
+ *
+ * rcu_read_lock();
+ * a = rcu_dereference(global);
+ * if (a)
+ * do_func(a->x);
+ * rcu_read_unlock();
+ *
+ * some_stuff();
+ *
+ * rcu_read_lock();
+ * b = rcu_dereference(global);
+ * if (b && cmm_ptr_eq(a, b)) // force to use b->x and not a->x
+ * do_func(b->x);
+ * rcu_read_unlock();
+ * }
+ *
+ * Consider what could happen if such compiler optimization was done while the
+ * following mutator exists. The mutator removes a `work' node that was
+ * previously stored in a global location by storing NULL. It then call
+ * rcu_synchronize() to ensure no readers can see the node. From there, a new
+ * work is created for the node and the node is commited with
+ * rcu_assign_pointer(), thus the same pointer is now visible to readers. This
+ * fictional mutator is purposely trying to create a ABA problem for the sake of
+ * the argument:
+ *
+ * void mutator(void)
+ * {
+ * struct work *work;
+ *
+ * work = global;
+ * rcu_assign_pointer(global, NULL);
+ * rcu_synchronize();
+ * work->x = new_work();
+ * rcu_assign_pointer(global, work);
+ * }
+ *
+ * Then, the following execution could happen, where the new work assigned is
+ * not executed. In other words, the compiler optimizations can introduce a ABA
+ * problem, defeating one of RCU's purposes.
+ *
+ * Worker: Mutator:
+ * let a, b let work = global
+ * let tmp
+ * rcu_read_lock()
+ * a = rcu_dereference(global)
+ * if (a) {
+ * tmp = a->x
+ * do_func(tmp)
+ * }
+ * rcu_read_unlock()
+ * do_stuff() ...
+ * ... rcu_assign_pointer(global, NULL)
+ * ... rcu_synchronize()
+ * ... work->x = new_work()
+ * ... rcu_assign_pointer(global, work)
+ * rcu_read_lock()
+ * b = rcu_dereference(global)
+ * if (b && (a == b)) // BUGGY !!!
+ * do_func(tmp)
+ * rcu_read_unlock()
+ *
+ * In other words, the publication guarantee is lost here. The store to
+ * `work->x' in the mutator is supposed to be observable by the second
+ * rcu_dereference() in the worker, because of the matching
+ * rcu_assign_pointer(). But due to the equality comparison, the compiler
+ * decides to reuse the prior rcu_dereference().
*/
-# define _rcu_dereference(p) \
- uatomic_load(&(p), CMM_CONSUME)
+static inline int cmm_ptr_eq(const void *a, const void *b)
+{
+ _CMM_OPTIMIZER_HIDE_VAR(a);
+ _CMM_OPTIMIZER_HIDE_VAR(b);
+
+ return a == b;
+}
+
/**
* _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer
* is as expected by "old". If succeeds, returns the previous pointer to the