Add cmm_emit_legacy_smp_mb()

author Olivier Dion <odion@efficios.com>

Mon, 29 May 2023 15:21:11 +0000 (11:21 -0400)

committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Mon, 14 Aug 2023 19:46:29 +0000 (15:46 -0400)
author Olivier Dion <odion@efficios.com>
Mon, 29 May 2023 15:21:11 +0000 (11:21 -0400)
committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Mon, 14 Aug 2023 19:46:29 +0000 (15:46 -0400)
diff --git a/configure.ac b/configure.ac

index 7045cdc066660ed78fe93385fbbef89821fcbcd2..15055d6ca2f1d639bcc22157310dfc0fa9a8f35b 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -239,6 +239,11 @@ AE_FEATURE([cds-lfht-iter-debug], [Enable extra debugging checks for lock-free h
  AE_FEATURE_DEFAULT_DISABLE
  AE_FEATURE([compiler-atomic-builtins], [Enable the use of compiler atomic builtins.])
  
+# emit legacy memory barriers
+# Enable by default
+AE_FEATURE_DEFAULT_ENABLE
+AE_FEATURE([legacy-mb], [Disable legacy memory barriers.])
+
  # When given, add -Werror to WARN_CFLAGS and WARN_CXXFLAGS.
  # Disabled by default
  AE_FEATURE_DEFAULT_DISABLE
@@ -272,6 +277,10 @@ AE_IF_FEATURE_ENABLED([compiler-atomic-builtins], [
    AC_DEFINE([CONFIG_RCU_USE_ATOMIC_BUILTINS], [1], [Use compiler atomic builtins.])
  ])
  
+AE_IF_FEATURE_ENABLED([legacy-mb], [
+  AC_DEFINE([CONFIG_RCU_EMIT_LEGACY_MB], [1], [Emit legacy memory barriers that were documented in the APIs.])
+])
+
  ##                                                                          ##
  ## Set automake variables for optional feature conditionnals in Makefile.am ##
  ##                                                                          ##
@@ -390,6 +399,10 @@ AE_PPRINT_PROP_BOOL([Multi-flavor support], 1)
  AE_IS_FEATURE_ENABLED([compiler-atomic-builtins]) && value=1 || value=0
  AE_PPRINT_PROP_BOOL([Use compiler atomic builtins], $value)
  
+# legacy memory barriers
+AE_IS_FEATURE_ENABLED([legacy-mb]) && value=1 || value=0
+AE_PPRINT_PROP_BOOL([Emit legacy memory barriers], $value)
+
  report_bindir="`eval eval echo $bindir`"
  report_libdir="`eval eval echo $libdir`"
  
diff --git a/include/urcu/arch.h b/include/urcu/arch.h

index 45ba6a222fe507b5910a307e667d72d27545672e..717d79c6a2c6020c27baf94bcc7b099995245b9f 100644 (file)
--- a/include/urcu/arch.h
+++ b/include/urcu/arch.h
@@ -155,5 +155,11 @@
  #error "Cannot build: unrecognized architecture, see <urcu/arch.h>."
  #endif
  
+#ifdef CONFIG_RCU_EMIT_LEGACY_MB
+# define cmm_emit_legacy_smp_mb() cmm_smp_mb()
+#else
+# define cmm_emit_legacy_smp_mb() do { } while (0)
+#endif
+
  
  #endif /* _URCU_ARCH_H */
diff --git a/include/urcu/config.h.in b/include/urcu/config.h.in

index aa1d6c92b24cb2bff2d97751aaac5cad0c4f7da1..473d7a2ec49a076225f5096e7ae65315cfc04aba 100644 (file)
--- a/include/urcu/config.h.in
+++ b/include/urcu/config.h.in
@@ -26,6 +26,9 @@
  /* Uatomic API uses atomic builtins. */
  #undef CONFIG_RCU_USE_ATOMIC_BUILTINS
  
+/* Emit legacy memory barriers? */
+#undef CONFIG_RCU_EMIT_LEGACY_MB
+
  /* Expose multi-flavor support */
  #define CONFIG_RCU_HAVE_MULTIFLAVOR 1
  
diff --git a/include/urcu/static/lfstack.h b/include/urcu/static/lfstack.h

index 75db75ea3652adc37dea3835a89c1a03d1cfdb6b..d7e70d4966d6bab1f5f040c634a025c84f7baa99 100644 (file)
--- a/include/urcu/static/lfstack.h
+++ b/include/urcu/static/lfstack.h
@@ -100,7 +100,7 @@ bool ___cds_lfs_empty_head(struct cds_lfs_head *head)
  static inline
  bool _cds_lfs_empty(cds_lfs_stack_ptr_t s)
  {
-       return ___cds_lfs_empty_head(CMM_LOAD_SHARED(s._s->head));
+       return ___cds_lfs_empty_head(uatomic_load(&s._s->head, CMM_RELAXED));
  }
  
  /*
@@ -108,6 +108,8 @@ bool _cds_lfs_empty(cds_lfs_stack_ptr_t s)
   *
   * Does not require any synchronization with other push nor pop.
   *
+ * Operations before push are consistent when observed after associated pop.
+ *
   * Lock-free stack push is not subject to ABA problem, so no need to
   * take the RCU read-side lock. Even if "head" changes between two
   * uatomic_cmpxchg() invocations here (being popped, and then pushed
@@ -153,7 +155,9 @@ bool _cds_lfs_push(cds_lfs_stack_ptr_t u_s,
                  * uatomic_cmpxchg() implicit memory barrier orders earlier
                  * stores to node before publication.
                  */
-               head = uatomic_cmpxchg(&s->head, old_head, new_head);
+               cmm_emit_legacy_smp_mb();
+               head = uatomic_cmpxchg_mo(&s->head, old_head, new_head,
+                                       CMM_SEQ_CST, CMM_SEQ_CST);
                 if (old_head == head)
                         break;
         }
@@ -165,6 +169,8 @@ bool _cds_lfs_push(cds_lfs_stack_ptr_t u_s,
   *
   * Returns NULL if stack is empty.
   *
+ * Operations after pop are consistent when observed before associated push.
+ *
   * __cds_lfs_pop needs to be synchronized using one of the following
   * techniques:
   *
@@ -189,7 +195,7 @@ struct cds_lfs_node *___cds_lfs_pop(cds_lfs_stack_ptr_t u_s)
                 struct cds_lfs_head *head, *next_head;
                 struct cds_lfs_node *next;
  
-               head = _CMM_LOAD_SHARED(s->head);
+               head = uatomic_load(&s->head, CMM_CONSUME);
                 if (___cds_lfs_empty_head(head))
                         return NULL;    /* Empty stack */
  
@@ -198,12 +204,14 @@ struct cds_lfs_node *___cds_lfs_pop(cds_lfs_stack_ptr_t u_s)
                  * memory barrier before uatomic_cmpxchg() in
                  * cds_lfs_push.
                  */
-               cmm_smp_read_barrier_depends();
-               next = _CMM_LOAD_SHARED(head->node.next);
+               next = uatomic_load(&head->node.next, CMM_RELAXED);
                 next_head = caa_container_of(next,
                                 struct cds_lfs_head, node);
-               if (uatomic_cmpxchg(&s->head, head, next_head) == head)
+               if (uatomic_cmpxchg_mo(&s->head, head, next_head,
+                                       CMM_SEQ_CST, CMM_SEQ_CST) == head){
+                       cmm_emit_legacy_smp_mb();
                         return &head->node;
+               }
                 /* busy-loop if head changed under us */
         }
  }
@@ -231,6 +239,7 @@ static inline
  struct cds_lfs_head *___cds_lfs_pop_all(cds_lfs_stack_ptr_t u_s)
  {
         struct __cds_lfs_stack *s = u_s._s;
+       struct cds_lfs_head *head;
  
         /*
          * Implicit memory barrier after uatomic_xchg() matches implicit
@@ -242,7 +251,9 @@ struct cds_lfs_head *___cds_lfs_pop_all(cds_lfs_stack_ptr_t u_s)
          * taking care to order writes to each node prior to the full
          * memory barrier after this uatomic_xchg().
          */
-       return uatomic_xchg(&s->head, NULL);
+       head = uatomic_xchg_mo(&s->head, NULL, CMM_SEQ_CST);
+       cmm_emit_legacy_smp_mb();
+       return head;
  }
  
  /*
diff --git a/include/urcu/static/rculfqueue.h b/include/urcu/static/rculfqueue.h

index c0daffea78b5f1d2f2b65fb7baab205ae8da58dd..03c4ecdef76c6ee55102e5b36caaea5dee954841 100644 (file)
--- a/include/urcu/static/rculfqueue.h
+++ b/include/urcu/static/rculfqueue.h
@@ -134,26 +134,29 @@ void _cds_lfq_enqueue_rcu(struct cds_lfq_queue_rcu *q,
          * uatomic_cmpxchg() implicit memory barrier orders earlier stores to
          * node before publication.
          */
-
         for (;;) {
                 struct cds_lfq_node_rcu *tail, *next;
  
                 tail = rcu_dereference(q->tail);
-               next = uatomic_cmpxchg(&tail->next, NULL, node);
+               cmm_emit_legacy_smp_mb();
+               next = uatomic_cmpxchg_mo(&tail->next, NULL, node,
+                                       CMM_SEQ_CST, CMM_SEQ_CST);
                 if (next == NULL) {
                         /*
                          * Tail was at the end of queue, we successfully
                          * appended to it. Now move tail (another
                          * enqueue might beat us to it, that's fine).
                          */
-                       (void) uatomic_cmpxchg(&q->tail, tail, node);
+                       (void) uatomic_cmpxchg_mo(&q->tail, tail, node,
+                                               CMM_SEQ_CST, CMM_SEQ_CST);
                         return;
                 } else {
                         /*
                          * Failure to append to current tail.
                          * Help moving tail further and retry.
                          */
-                       (void) uatomic_cmpxchg(&q->tail, tail, next);
+                       (void) uatomic_cmpxchg_mo(&q->tail, tail, next,
+                                               CMM_SEQ_CST, CMM_SEQ_CST);
                         continue;
                 }
         }
@@ -197,7 +200,8 @@ struct cds_lfq_node_rcu *_cds_lfq_dequeue_rcu(struct cds_lfq_queue_rcu *q)
                         enqueue_dummy(q);
                         next = rcu_dereference(head->next);
                 }
-               if (uatomic_cmpxchg(&q->head, head, next) != head)
+               if (uatomic_cmpxchg_mo(&q->head, head, next,
+                                       CMM_SEQ_CST, CMM_SEQ_CST) != head)
                         continue;       /* Concurrently pushed. */
                 if (head->dummy) {
                         /* Free dummy after grace period. */
diff --git a/include/urcu/static/rculfstack.h b/include/urcu/static/rculfstack.h

index 5bb06b1a366142e1552ea6be01e691a07009862f..b44b9e29fdbad9b73024d653fb992218f221644b 100644 (file)
--- a/include/urcu/static/rculfstack.h
+++ b/include/urcu/static/rculfstack.h
@@ -69,7 +69,9 @@ int _cds_lfs_push_rcu(struct cds_lfs_stack_rcu *s,
                  * uatomic_cmpxchg() implicit memory barrier orders earlier
                  * stores to node before publication.
                  */
-               head = uatomic_cmpxchg(&s->head, old_head, node);
+               cmm_emit_legacy_smp_mb();
+               head = uatomic_cmpxchg_mo(&s->head, old_head, node,
+                                       CMM_SEQ_CST, CMM_SEQ_CST);
                 if (old_head == head)
                         break;
         }
@@ -94,7 +96,9 @@ _cds_lfs_pop_rcu(struct cds_lfs_stack_rcu *s)
                 if (head) {
                         struct cds_lfs_node_rcu *next = rcu_dereference(head->next);
  
-                       if (uatomic_cmpxchg(&s->head, head, next) == head) {
+                       if (uatomic_cmpxchg_mo(&s->head, head, next,
+                                               CMM_SEQ_CST, CMM_SEQ_CST) == head) {
+                               cmm_emit_legacy_smp_mb();
                                 return head;
                         } else {
                                 /* Concurrent modification. Retry. */
diff --git a/include/urcu/static/wfcqueue.h b/include/urcu/static/wfcqueue.h

index 8c4729c532150f65c79da8f9d39d39933a450ff6..26741ae81d241ed04f1917b68e7755847c90490b 100644 (file)
--- a/include/urcu/static/wfcqueue.h
+++ b/include/urcu/static/wfcqueue.h
@@ -77,6 +77,11 @@ static inline void _cds_wfcq_node_init(struct cds_wfcq_node *node)
         node->next = NULL;
  }
  
+static inline void _cds_wfcq_node_init_atomic(struct cds_wfcq_node *node)
+{
+       uatomic_store(&node->next, NULL, CMM_RELAXED);
+}
+
  /*
   * cds_wfcq_init: initialize wait-free queue (with lock). Pair with
   * cds_wfcq_destroy().
@@ -139,8 +144,8 @@ static inline bool _cds_wfcq_empty(cds_wfcq_head_ptr_t u_head,
          * common case to ensure that dequeuers do not frequently access
          * enqueuer's tail->p cache line.
          */
-       return CMM_LOAD_SHARED(head->node.next) == NULL
-               && CMM_LOAD_SHARED(tail->p) == &head->node;
+       return uatomic_load(&head->node.next, CMM_CONSUME) == NULL
+               && uatomic_load(&tail->p, CMM_CONSUME) == &head->node;
  }
  
  static inline void _cds_wfcq_dequeue_lock(struct cds_wfcq_head *head,
@@ -174,7 +179,7 @@ static inline bool ___cds_wfcq_append(cds_wfcq_head_ptr_t u_head,
          * stores to data structure containing node and setting
          * node->next to NULL before publication.
          */
-       old_tail = uatomic_xchg(&tail->p, new_tail);
+       old_tail = uatomic_xchg_mo(&tail->p, new_tail, CMM_SEQ_CST);
  
         /*
          * Implicit memory barrier after uatomic_xchg() orders store to
@@ -185,7 +190,8 @@ static inline bool ___cds_wfcq_append(cds_wfcq_head_ptr_t u_head,
          * store will append "node" to the queue from a dequeuer
          * perspective.
          */
-       CMM_STORE_SHARED(old_tail->next, new_head);
+       uatomic_store(&old_tail->next, new_head, CMM_RELEASE);
+
         /*
          * Return false if queue was empty prior to adding the node,
          * else return true.
@@ -196,8 +202,8 @@ static inline bool ___cds_wfcq_append(cds_wfcq_head_ptr_t u_head,
  /*
   * cds_wfcq_enqueue: enqueue a node into a wait-free queue.
   *
- * Issues a full memory barrier before enqueue. No mutual exclusion is
- * required.
+ * Operations prior to enqueue are consistant with respect to dequeuing or
+ * splicing and iterating.
   *
   * Returns false if the queue was empty prior to adding the node.
   * Returns true otherwise.
@@ -206,6 +212,8 @@ static inline bool _cds_wfcq_enqueue(cds_wfcq_head_ptr_t head,
                 struct cds_wfcq_tail *tail,
                 struct cds_wfcq_node *new_tail)
  {
+       cmm_emit_legacy_smp_mb();
+
         return ___cds_wfcq_append(head, tail, new_tail, new_tail);
  }
  
@@ -256,8 +264,10 @@ ___cds_wfcq_node_sync_next(struct cds_wfcq_node *node, int blocking)
  
         /*
          * Adaptative busy-looping waiting for enqueuer to complete enqueue.
+        *
+        * Load node.next before loading node's content
          */
-       while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+       while ((next = uatomic_load(&node->next, CMM_CONSUME)) == NULL) {
                 if (___cds_wfcq_busy_wait(&attempt, blocking))
                         return CDS_WFCQ_WOULDBLOCK;
         }
@@ -276,8 +286,7 @@ ___cds_wfcq_first(cds_wfcq_head_ptr_t u_head,
         if (_cds_wfcq_empty(__cds_wfcq_head_cast(head), tail))
                 return NULL;
         node = ___cds_wfcq_node_sync_next(&head->node, blocking);
-       /* Load head->node.next before loading node's content */
-       cmm_smp_read_barrier_depends();
+
         return node;
  }
  
@@ -329,16 +338,15 @@ ___cds_wfcq_next(cds_wfcq_head_ptr_t head __attribute__((unused)),
          * out if we reached the end of the queue, we first check
          * node->next as a common case to ensure that iteration on nodes
          * do not frequently access enqueuer's tail->p cache line.
+        *
+        * Load node->next before loading next's content
          */
-       if ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
-               /* Load node->next before tail->p */
-               cmm_smp_rmb();
-               if (CMM_LOAD_SHARED(tail->p) == node)
+       if ((next = uatomic_load(&node->next, CMM_CONSUME)) == NULL) {
+               if (uatomic_load(&tail->p, CMM_RELAXED) == node)
                         return NULL;
                 next = ___cds_wfcq_node_sync_next(node, blocking);
         }
-       /* Load node->next before loading next's content */
-       cmm_smp_read_barrier_depends();
+
         return next;
  }
  
@@ -400,7 +408,7 @@ ___cds_wfcq_dequeue_with_state(cds_wfcq_head_ptr_t u_head,
                 return CDS_WFCQ_WOULDBLOCK;
         }
  
-       if ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+       if ((next = uatomic_load(&node->next, CMM_CONSUME)) == NULL) {
                 /*
                  * @node is probably the only node in the queue.
                  * Try to move the tail to &q->head.
@@ -408,17 +416,13 @@ ___cds_wfcq_dequeue_with_state(cds_wfcq_head_ptr_t u_head,
                  * NULL if the cmpxchg succeeds. Should the
                  * cmpxchg fail due to a concurrent enqueue, the
                  * q->head.next will be set to the next node.
-                * The implicit memory barrier before
-                * uatomic_cmpxchg() orders load node->next
-                * before loading q->tail.
-                * The implicit memory barrier before uatomic_cmpxchg
-                * orders load q->head.next before loading node's
-                * content.
                  */
-               _cds_wfcq_node_init(&head->node);
-               if (uatomic_cmpxchg(&tail->p, node, &head->node) == node) {
+               _cds_wfcq_node_init_atomic(&head->node);
+               if (uatomic_cmpxchg_mo(&tail->p, node, &head->node,
+                                       CMM_SEQ_CST, CMM_SEQ_CST) == node) {
                         if (state)
                                 *state |= CDS_WFCQ_STATE_LAST;
+                       cmm_emit_legacy_smp_mb();
                         return node;
                 }
                 next = ___cds_wfcq_node_sync_next(node, blocking);
@@ -428,7 +432,7 @@ ___cds_wfcq_dequeue_with_state(cds_wfcq_head_ptr_t u_head,
                  * (currently NULL) back to its original value.
                  */
                 if (!blocking && next == CDS_WFCQ_WOULDBLOCK) {
-                       head->node.next = node;
+                       uatomic_store(&head->node.next, node, CMM_RELAXED);
                         return CDS_WFCQ_WOULDBLOCK;
                 }
         }
@@ -436,10 +440,9 @@ ___cds_wfcq_dequeue_with_state(cds_wfcq_head_ptr_t u_head,
         /*
          * Move queue head forward.
          */
-       head->node.next = next;
+       uatomic_store(&head->node.next, next, CMM_RELAXED);
+       cmm_emit_legacy_smp_mb();
  
-       /* Load q->head.next before loading node's content */
-       cmm_smp_read_barrier_depends();
         return node;
  }
  
@@ -501,6 +504,8 @@ ___cds_wfcq_dequeue_nonblocking(cds_wfcq_head_ptr_t head,
  /*
   * __cds_wfcq_splice: enqueue all src_q nodes at the end of dest_q.
   *
+ * Operations after splice are consistant with respect to enqueue.
+ *
   * Dequeue all nodes from src_q.
   * dest_q must be already initialized.
   * Mutual exclusion for src_q should be ensured by the caller as
@@ -534,10 +539,10 @@ ___cds_wfcq_splice(
                  * uatomic_xchg, as well as tail pointer vs head node
                  * address.
                  */
-               head = uatomic_xchg(&src_q_head->node.next, NULL);
+               head = uatomic_xchg_mo(&src_q_head->node.next, NULL, CMM_SEQ_CST);
                 if (head)
                         break;  /* non-empty */
-               if (CMM_LOAD_SHARED(src_q_tail->p) == &src_q_head->node)
+               if (uatomic_load(&src_q_tail->p, CMM_CONSUME) == &src_q_head->node)
                         return CDS_WFCQ_RET_SRC_EMPTY;
                 if (___cds_wfcq_busy_wait(&attempt, blocking))
                         return CDS_WFCQ_RET_WOULDBLOCK;
@@ -549,7 +554,8 @@ ___cds_wfcq_splice(
          * concurrent enqueue on src_q, which exchanges the tail before
          * updating the previous tail's next pointer.
          */
-       tail = uatomic_xchg(&src_q_tail->p, &src_q_head->node);
+       cmm_emit_legacy_smp_mb();
+       tail = uatomic_xchg_mo(&src_q_tail->p, &src_q_head->node, CMM_SEQ_CST);
  
         /*
          * Append the spliced content of src_q into dest_q. Does not
diff --git a/include/urcu/static/wfqueue.h b/include/urcu/static/wfqueue.h

index 731b43f71a3be5d65967d981511f2baf9a513fec..0cb7b1b98cd965557121af56910687c94c2ecdd8 100644 (file)
--- a/include/urcu/static/wfqueue.h
+++ b/include/urcu/static/wfqueue.h
@@ -67,13 +67,14 @@ static inline void _cds_wfq_enqueue(struct cds_wfq_queue *q,
          * structure containing node and setting node->next to NULL before
          * publication.
          */
-       old_tail = uatomic_xchg(&q->tail, &node->next);
+       cmm_emit_legacy_smp_mb();
+       old_tail = uatomic_xchg_mo(&q->tail, &node->next, CMM_SEQ_CST);
         /*
          * At this point, dequeuers see a NULL old_tail->next, which indicates
          * that the queue is being appended to. The following store will append
          * "node" to the queue from a dequeuer perspective.
          */
-       CMM_STORE_SHARED(*old_tail, node);
+       uatomic_store(old_tail, node, CMM_RELEASE);
  }
  
  /*
@@ -88,7 +89,7 @@ ___cds_wfq_node_sync_next(struct cds_wfq_node *node)
         /*
          * Adaptative busy-looping waiting for enqueuer to complete enqueue.
          */
-       while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+       while ((next = uatomic_load(&node->next, CMM_CONSUME)) == NULL) {
                 if (++attempt >= WFQ_ADAPT_ATTEMPTS) {
                         (void) poll(NULL, 0, WFQ_WAIT); /* Wait for 10ms */
                         attempt = 0;
@@ -115,7 +116,7 @@ ___cds_wfq_dequeue_blocking(struct cds_wfq_queue *q)
         /*
          * Queue is empty if it only contains the dummy node.
          */
-       if (q->head == &q->dummy && CMM_LOAD_SHARED(q->tail) == &q->dummy.next)
+       if (q->head == &q->dummy && uatomic_load(&q->tail, CMM_CONSUME) == &q->dummy.next)
                 return NULL;
         node = q->head;
  
diff --git a/include/urcu/static/wfstack.h b/include/urcu/static/wfstack.h

index 8c5648ea3ede060ddc4c2d4ddfe244faf6ece5bc..c46e97d9f25197e0e701a55ef24b0975bd1b2aac 100644 (file)
--- a/include/urcu/static/wfstack.h
+++ b/include/urcu/static/wfstack.h
@@ -110,7 +110,7 @@ static inline bool _cds_wfs_empty(cds_wfs_stack_ptr_t u_stack)
  {
         struct __cds_wfs_stack *s = u_stack._s;
  
-       return ___cds_wfs_end(CMM_LOAD_SHARED(s->head));
+       return ___cds_wfs_end(uatomic_load(&s->head, CMM_RELAXED));
  }
  
  /*
@@ -119,6 +119,8 @@ static inline bool _cds_wfs_empty(cds_wfs_stack_ptr_t u_stack)
   * Issues a full memory barrier before push. No mutual exclusion is
   * required.
   *
+ * Operations before push are consistent when observed after associated pop.
+ *
   * Returns 0 if the stack was empty prior to adding the node.
   * Returns non-zero otherwise.
   */
@@ -134,12 +136,13 @@ int _cds_wfs_push(cds_wfs_stack_ptr_t u_stack, struct cds_wfs_node *node)
          * uatomic_xchg() implicit memory barrier orders earlier stores
          * to node (setting it to NULL) before publication.
          */
-       old_head = uatomic_xchg(&s->head, new_head);
+       cmm_emit_legacy_smp_mb();
+       old_head = uatomic_xchg_mo(&s->head, new_head, CMM_SEQ_CST);
         /*
          * At this point, dequeuers see a NULL node->next, they should
          * busy-wait until node->next is set to old_head.
          */
-       CMM_STORE_SHARED(node->next, &old_head->node);
+       uatomic_store(&node->next, &old_head->node, CMM_RELEASE);
         return !___cds_wfs_end(old_head);
  }
  
@@ -155,7 +158,7 @@ ___cds_wfs_node_sync_next(struct cds_wfs_node *node, int blocking)
         /*
          * Adaptative busy-looping waiting for push to complete.
          */
-       while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+       while ((next = uatomic_load(&node->next, CMM_CONSUME)) == NULL) {
                 if (!blocking)
                         return CDS_WFS_WOULDBLOCK;
                 if (++attempt >= CDS_WFS_ADAPT_ATTEMPTS) {
@@ -180,7 +183,7 @@ ___cds_wfs_pop(cds_wfs_stack_ptr_t u_stack, int *state, int blocking)
         if (state)
                 *state = 0;
         for (;;) {
-               head = CMM_LOAD_SHARED(s->head);
+               head = uatomic_load(&s->head, CMM_CONSUME);
                 if (___cds_wfs_end(head)) {
                         return NULL;
                 }
@@ -189,9 +192,11 @@ ___cds_wfs_pop(cds_wfs_stack_ptr_t u_stack, int *state, int blocking)
                         return CDS_WFS_WOULDBLOCK;
                 }
                 new_head = caa_container_of(next, struct cds_wfs_head, node);
-               if (uatomic_cmpxchg(&s->head, head, new_head) == head) {
+               if (uatomic_cmpxchg_mo(&s->head, head, new_head,
+                                       CMM_SEQ_CST, CMM_SEQ_CST) == head) {
                         if (state && ___cds_wfs_end(new_head))
                                 *state |= CDS_WFS_STATE_LAST;
+                       cmm_emit_legacy_smp_mb();
                         return &head->node;
                 }
                 if (!blocking) {
@@ -206,6 +211,8 @@ ___cds_wfs_pop(cds_wfs_stack_ptr_t u_stack, int *state, int blocking)
   *
   * Returns NULL if stack is empty.
   *
+ * Operations after pop push are consistent when observed before associated push.
+ *
   * __cds_wfs_pop_blocking needs to be synchronized using one of the
   * following techniques:
   *
@@ -264,6 +271,8 @@ ___cds_wfs_pop_nonblocking(cds_wfs_stack_ptr_t u_stack)
  /*
   * __cds_wfs_pop_all: pop all nodes from a stack.
   *
+ * Operations after pop push are consistent when observed before associated push.
+ *
   * __cds_wfs_pop_all does not require any synchronization with other
   * push, nor with other __cds_wfs_pop_all, but requires synchronization
   * matching the technique used to synchronize __cds_wfs_pop_blocking:
@@ -295,7 +304,8 @@ ___cds_wfs_pop_all(cds_wfs_stack_ptr_t u_stack)
          * taking care to order writes to each node prior to the full
          * memory barrier after this uatomic_xchg().
          */
-       head = uatomic_xchg(&s->head, CDS_WFS_END);
+       head = uatomic_xchg_mo(&s->head, CDS_WFS_END, CMM_SEQ_CST);
+       cmm_emit_legacy_smp_mb();
         if (___cds_wfs_end(head))
                 return NULL;
         return head;
author	Olivier Dion <odion@efficios.com>
	Mon, 29 May 2023 15:21:11 +0000 (11:21 -0400)
committer	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Mon, 14 Aug 2023 19:46:29 +0000 (15:46 -0400)
configure.ac		patch \| blob \| blame \| history
include/urcu/arch.h		patch \| blob \| blame \| history
include/urcu/config.h.in		patch \| blob \| blame \| history
include/urcu/static/lfstack.h		patch \| blob \| blame \| history
include/urcu/static/rculfqueue.h		patch \| blob \| blame \| history
include/urcu/static/rculfstack.h		patch \| blob \| blame \| history
include/urcu/static/wfcqueue.h		patch \| blob \| blame \| history
include/urcu/static/wfqueue.h		patch \| blob \| blame \| history
include/urcu/static/wfstack.h		patch \| blob \| blame \| history