Fix: ring buffer: RING_BUFFER_FLUSH ioctl buffer corruption
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sun, 30 Jun 2013 19:37:46 +0000 (15:37 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sun, 30 Jun 2013 21:50:31 +0000 (17:50 -0400)
lib_ring_buffer_switch_slow() clearly states:

 * Note, however, that as a v_cmpxchg is used for some atomic
 * operations, this function must be called from the CPU which owns the
 * buffer for a ACTIVE flush.

But unfortunately, the RING_BUFFER_FLUSH ioctl does not follow these
important directives. Therefore, whenever the consumer daemon or session
daemon explicitly triggers a "flush" on a buffer, it can race with data
being written to the buffer, leading to corruption of the reserve/commit
counters, and therefore corruption of data in the buffer. It triggers
these warnings for overwrite mode buffers:

[65356.890016] WARNING: at
/home/compudj/git/lttng-modules/wrapper/ringbuffer/../../lib/ringbuffer/../../wrapper/ringbuffer/../../lib/ringbuffer/backend.h:110 lttng_event_write+0x118/0x140 [lttng_ring_buffer_client_mmap_overwrite]()

Which indicates that we are trying to write into a sub-buffer for which
we don't have exclusive access. It also causes the following warnings to
show up:

[65335.896208] ring buffer relay-overwrite-mmap, cpu 5: records were lost. Caused by:
[65335.896208]   [ 0 buffer full, 80910 nest buffer wrap-around, 0 event too big ]

Which is caused by corrupted commit counter.

Fix this by sending an IPI to the CPU owning the flushed buffer for
per-cpu synchronization. For global synchronization, no IPI is needed,
since we allow writes from remote CPUs.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
lib/ringbuffer/frontend_internal.h
lib/ringbuffer/ring_buffer_frontend.c
lib/ringbuffer/ring_buffer_vfs.c

index a86abb14e683825029e138660462bcc6f02d2e66..bc284333f64c505e6e3fca646248073a8927f306 100644 (file)
@@ -156,6 +156,9 @@ extern
 void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf,
                                 enum switch_mode mode);
 
+extern
+void lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf);
+
 /* Buffer write helpers */
 
 static inline
index bff920d16a0f6a7a40d3eb2325bb9612a5639273..92aa78f57ddf2ed95f42c16e23728d32f58291b6 100644 (file)
@@ -1516,6 +1516,48 @@ void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode m
 }
 EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_slow);
 
+static void remote_switch(void *info)
+{
+       struct lib_ring_buffer *buf = info;
+
+       lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
+}
+
+void lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf)
+{
+       struct channel *chan = buf->backend.chan;
+       const struct lib_ring_buffer_config *config = &chan->backend.config;
+       int ret;
+
+       /*
+        * With global synchronization we don't need to use the IPI scheme.
+        */
+       if (config->sync == RING_BUFFER_SYNC_GLOBAL) {
+               lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
+               return;
+       }
+
+       /*
+        * Taking lock on CPU hotplug to ensure two things: first, that the
+        * target cpu is not taken concurrently offline while we are within
+        * smp_call_function_single() (I don't trust that get_cpu() on the
+        * _local_ CPU actually inhibit CPU hotplug for the _remote_ CPU (to be
+        * confirmed)). Secondly, if it happens that the CPU is not online, our
+        * own call to lib_ring_buffer_switch_slow() needs to be protected from
+        * CPU hotplug handlers, which can also perform a remote subbuffer
+        * switch.
+        */
+       get_online_cpus();
+       ret = smp_call_function_single(buf->backend.cpu,
+                                remote_switch, buf, 1);
+       if (ret) {
+               /* Remote CPU is offline, do it ourself. */
+               lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
+       }
+       put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote);
+
 /*
  * Returns :
  * 0 if ok
index 899af8124f05c10f296b7ea167383a399fdb1e28..a7c0e695beecfb96769b95464784d0767f2de423 100644 (file)
@@ -261,7 +261,7 @@ long lib_ring_buffer_ioctl(struct file *filp, unsigned int cmd, unsigned long ar
                                 arg);
        }
        case RING_BUFFER_FLUSH:
-               lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
+               lib_ring_buffer_switch_remote(buf);
                return 0;
        default:
                return -ENOIOCTLCMD;
@@ -374,7 +374,7 @@ long lib_ring_buffer_compat_ioctl(struct file *filp, unsigned int cmd,
                return compat_put_ulong(read_offset, arg);
        }
        case RING_BUFFER_COMPAT_FLUSH:
-               lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
+               lib_ring_buffer_switch_remote(buf);
                return 0;
        default:
                return -ENOIOCTLCMD;
This page took 0.02856 seconds and 4 git commands to generate.