libust/buffers.h

   1 /*
   2  * buffers.h
   3  * LTTng userspace tracer buffering system
   4  *
   5  * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
   6  * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
  21  */
  22
  23 #ifndef _UST_BUFFERS_H
  24 #define _UST_BUFFERS_H
  25
  26 #include <assert.h>
  27
  28 #include <ust/core.h>
  29
  30 #include "usterr.h"
  31 #include "channels.h"
  32 #include "tracerconst.h"
  33 #include "tracercore.h"
  34 #include "header-inline.h"
  35
  36 /***** FIXME: SHOULD BE REMOVED ***** */
  37
  38 /*
  39  * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
  40  * the offset, which leaves only the buffer number.
  41  */
  42 #define BUFFER_TRUNC(offset, chan) \
  43         ((offset) & (~((chan)->alloc_size-1)))
  44 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
  45 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
  46 #define SUBBUF_ALIGN(offset, chan) \
  47         (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
  48 #define SUBBUF_TRUNC(offset, chan) \
  49         ((offset) & (~((chan)->subbuf_size - 1)))
  50 #define SUBBUF_INDEX(offset, chan) \
  51         (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
  52
  53 /*
  54  * Tracks changes to rchan/rchan_buf structs
  55  */
  56 #define UST_CHANNEL_VERSION             8
  57
  58 /**************************************/
  59
  60 struct commit_counters {
  61         long cc;                        /* ATOMIC */
  62         long cc_sb;                     /* ATOMIC - Incremented _once_ at sb switch */
  63 };
  64
  65 struct ust_buffer {
  66         /* First 32 bytes cache-hot cacheline */
  67         long offset;                    /* Current offset in the buffer *atomic* */
  68         struct commit_counters *commit_count;   /* Commit count per sub-buffer */
  69         long consumed;                  /* Current offset in the buffer *atomic* access (shared) */
  70         unsigned long last_tsc;         /*
  71                                          * Last timestamp written in the buffer.
  72                                          */
  73         /* End of first 32 bytes cacheline */
  74         long active_readers;    /* ATOMIC - Active readers count standard atomic access (shared) */
  75         long events_lost;       /* ATOMIC */
  76         long corrupted_subbuffers; /* *ATOMIC* */
  77         /* one byte is written to this pipe when data is available, in order
  78            to wake the consumer */
  79         /* portability: Single byte writes must be as quick as possible. The kernel-side
  80            buffer must be large enough so the writer doesn't block. From the pipe(7)
  81            man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
  82         int data_ready_fd_write;
  83         /* the reading end of the pipe */
  84         int data_ready_fd_read;
  85         /*
  86          * List of buffers with an open pipe, used for fork and forced subbuffer
  87          * switch.
  88          */
  89         struct list_head open_buffers_list;
  90
  91         unsigned int finalized;
  92 //ust// struct timer_list switch_timer; /* timer for periodical switch */
  93         unsigned long switch_timer_interval; /* 0 = unset */
  94
  95         struct ust_channel *chan;
  96
  97         struct kref kref;
  98         void *buf_data;
  99         size_t buf_size;
 100         int shmid;
 101         unsigned int cpu;
 102
 103         /* commit count per subbuffer; must be at end of struct */
 104         long commit_seq[0]; /* ATOMIC */
 105 } ____cacheline_aligned;
 106
 107 /*
 108  * A switch is done during tracing or as a final flush after tracing (so it
 109  * won't write in the new sub-buffer).
 110  * FIXME: make this message clearer
 111  */
 112 enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
 113
 114 extern int ltt_reserve_slot_lockless_slow(struct ust_channel *chan,
 115                 struct ust_trace *trace, size_t data_size,
 116                 int largest_align, int cpu,
 117                 struct ust_buffer **ret_buf,
 118                 size_t *slot_size, long *buf_offset,
 119                 u64 *tsc, unsigned int *rflags);
 120
 121 extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
 122                 enum force_switch_mode mode);
 123
 124
 125 static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
 126 {
 127         union {
 128                 const void *src;
 129                 const u8 *src8;
 130                 const u16 *src16;
 131                 const u32 *src32;
 132                 const u64 *src64;
 133         } u = { .src = src };
 134
 135         switch (len) {
 136         case 0: break;
 137         case 1: *(u8 *)dest = *u.src8;
 138                 break;
 139         case 2: *(u16 *)dest = *u.src16;
 140                 break;
 141         case 4: *(u32 *)dest = *u.src32;
 142                 break;
 143         case 8: *(u64 *)dest = *u.src64;
 144                 break;
 145         default:
 146                 memcpy(dest, src, len);
 147         }
 148 }
 149
 150 static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
 151 {
 152         return ((char *)buf->buf_data)+offset;
 153 }
 154
 155 /*
 156  * Last TSC comparison functions. Check if the current TSC overflows
 157  * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
 158  * atomically.
 159  */
 160
 161 /* FIXME: does this test work properly? */
 162 #if (BITS_PER_LONG == 32)
 163 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
 164                                         u64 tsc)
 165 {
 166         ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
 167 }
 168
 169 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
 170                                         u64 tsc)
 171 {
 172         unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
 173
 174         if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
 175                 return 1;
 176         else
 177                 return 0;
 178 }
 179 #else
 180 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
 181                                         u64 tsc)
 182 {
 183         ltt_buf->last_tsc = (unsigned long)tsc;
 184 }
 185
 186 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
 187                                         u64 tsc)
 188 {
 189         if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
 190                 return 1;
 191         else
 192                 return 0;
 193 }
 194 #endif
 195
 196 static __inline__ void ltt_reserve_push_reader(
 197                 struct ust_channel *rchan,
 198                 struct ust_buffer *buf,
 199                 long offset)
 200 {
 201         long consumed_old, consumed_new;
 202
 203         do {
 204                 consumed_old = uatomic_read(&buf->consumed);
 205                 /*
 206                  * If buffer is in overwrite mode, push the reader consumed
 207                  * count if the write position has reached it and we are not
 208                  * at the first iteration (don't push the reader farther than
 209                  * the writer). This operation can be done concurrently by many
 210                  * writers in the same buffer, the writer being at the farthest
 211                  * write position sub-buffer index in the buffer being the one
 212                  * which will win this loop.
 213                  * If the buffer is not in overwrite mode, pushing the reader
 214                  * only happens if a sub-buffer is corrupted.
 215                  */
 216                 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
 217                    - SUBBUF_TRUNC(consumed_old, buf->chan))
 218                    >= rchan->alloc_size))
 219                         consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
 220                 else
 221                         return;
 222         } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
 223                         consumed_new) != consumed_old));
 224 }
 225
 226 static __inline__ void ltt_vmcore_check_deliver(
 227                 struct ust_buffer *buf,
 228                 long commit_count, long idx)
 229 {
 230         uatomic_set(&buf->commit_seq[idx], commit_count);
 231 }
 232
 233 static __inline__ void ltt_check_deliver(struct ust_channel *chan,
 234                 struct ust_buffer *buf,
 235                 long offset, long commit_count, long idx)
 236 {
 237         long old_commit_count = commit_count - chan->subbuf_size;
 238
 239         /* Check if all commits have been done */
 240         if (unlikely((BUFFER_TRUNC(offset, chan)
 241                         >> chan->n_subbufs_order)
 242                         - (old_commit_count
 243                            & chan->commit_count_mask) == 0)) {
 244                 /*
 245                  * If we succeeded in updating the cc_sb, we are delivering
 246                  * the subbuffer. Deals with concurrent updates of the "cc"
 247                  * value without adding a add_return atomic operation to the
 248                  * fast path.
 249                  */
 250                 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
 251                                          old_commit_count, commit_count)
 252                            == old_commit_count)) {
 253                         int result;
 254
 255                         /*
 256                          * Set noref flag for this subbuffer.
 257                          */
 258 //ust//                 ltt_set_noref_flag(rchan, buf, idx);
 259                         ltt_vmcore_check_deliver(buf, commit_count, idx);
 260
 261                         /* wakeup consumer */
 262                         result = write(buf->data_ready_fd_write, "1", 1);
 263                         if(result == -1) {
 264                                 PERROR("write (in ltt_relay_buffer_flush)");
 265                                 ERR("this should never happen!");
 266                         }
 267                 }
 268         }
 269 }
 270
 271 static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
 272 {
 273         long consumed_old, consumed_idx, commit_count, write_offset;
 274
 275         consumed_old = uatomic_read(&buf->consumed);
 276         consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
 277         commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
 278         /*
 279          * No memory barrier here, since we are only interested
 280          * in a statistically correct polling result. The next poll will
 281          * get the data is we are racing. The mb() that ensures correct
 282          * memory order is in get_subbuf.
 283          */
 284         write_offset = uatomic_read(&buf->offset);
 285
 286         /*
 287          * Check that the subbuffer we are trying to consume has been
 288          * already fully committed.
 289          */
 290
 291         if (((commit_count - chan->subbuf_size)
 292              & chan->commit_count_mask)
 293             - (BUFFER_TRUNC(consumed_old, buf->chan)
 294                >> chan->n_subbufs_order)
 295             != 0)
 296                 return 0;
 297
 298         /*
 299          * Check that we are not about to read the same subbuffer in
 300          * which the writer head is.
 301          */
 302         if ((SUBBUF_TRUNC(write_offset, buf->chan)
 303            - SUBBUF_TRUNC(consumed_old, buf->chan))
 304            == 0)
 305                 return 0;
 306
 307         return 1;
 308
 309 }
 310
 311 /*
 312  * returns 0 if reserve ok, or 1 if the slow path must be taken.
 313  */
 314 static __inline__ int ltt_relay_try_reserve(
 315                 struct ust_channel *chan,
 316                 struct ust_buffer *buf,
 317                 size_t data_size,
 318                 u64 *tsc, unsigned int *rflags, int largest_align,
 319                 long *o_begin, long *o_end, long *o_old,
 320                 size_t *before_hdr_pad, size_t *size)
 321 {
 322         *o_begin = uatomic_read(&buf->offset);
 323         *o_old = *o_begin;
 324
 325         *tsc = trace_clock_read64();
 326
 327 //ust// #ifdef CONFIG_LTT_VMCORE
 328 //ust//         prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
 329 //ust//         prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
 330 //ust// #else
 331 //ust//         prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
 332 //ust// #endif
 333         if (last_tsc_overflow(buf, *tsc))
 334                 *rflags = LTT_RFLAG_ID_SIZE_TSC;
 335
 336         if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
 337                 return 1;
 338
 339         *size = ust_get_header_size(chan,
 340                                 *o_begin, data_size,
 341                                 before_hdr_pad, *rflags);
 342         *size += ltt_align(*o_begin + *size, largest_align) + data_size;
 343         if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
 344                      > buf->chan->subbuf_size))
 345                 return 1;
 346
 347         /*
 348          * Event fits in the current buffer and we are not on a switch
 349          * boundary. It's safe to write.
 350          */
 351         *o_end = *o_begin + *size;
 352
 353         if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
 354                 /*
 355                  * The offset_end will fall at the very beginning of the next
 356                  * subbuffer.
 357                  */
 358                 return 1;
 359
 360         return 0;
 361 }
 362
 363 static __inline__ int ltt_reserve_slot(struct ust_channel *chan,
 364                                        struct ust_trace *trace, size_t data_size,
 365                                        int largest_align, int cpu,
 366                                        struct ust_buffer **ret_buf,
 367                                        size_t *slot_size, long *buf_offset, u64 *tsc,
 368                                        unsigned int *rflags)
 369 {
 370         struct ust_buffer *buf = *ret_buf = chan->buf[cpu];
 371         long o_begin, o_end, o_old;
 372         size_t before_hdr_pad;
 373
 374         /*
 375          * Perform retryable operations.
 376          */
 377         /* FIXME: make this really per cpu? */
 378         if (unlikely(LOAD_SHARED(ltt_nesting) > 4)) {
 379                 DBG("Dropping event because nesting is too deep.");
 380                 uatomic_inc(&buf->events_lost);
 381                 return -EPERM;
 382         }
 383
 384         if (unlikely(ltt_relay_try_reserve(chan, buf,
 385                         data_size, tsc, rflags,
 386                         largest_align, &o_begin, &o_end, &o_old,
 387                         &before_hdr_pad, slot_size)))
 388                 goto slow_path;
 389
 390         if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
 391                 goto slow_path;
 392
 393         /*
 394          * Atomically update last_tsc. This update races against concurrent
 395          * atomic updates, but the race will always cause supplementary full TSC
 396          * events, never the opposite (missing a full TSC event when it would be
 397          * needed).
 398          */
 399         save_last_tsc(buf, *tsc);
 400
 401         /*
 402          * Push the reader if necessary
 403          */
 404         ltt_reserve_push_reader(chan, buf, o_end - 1);
 405
 406         /*
 407          * Clear noref flag for this subbuffer.
 408          */
 409 //ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
 410
 411         *buf_offset = o_begin + before_hdr_pad;
 412         return 0;
 413 slow_path:
 414         return ltt_reserve_slot_lockless_slow(chan, trace, data_size,
 415                                               largest_align, cpu, ret_buf,
 416                                               slot_size, buf_offset, tsc,
 417                                               rflags);
 418 }
 419
 420 /*
 421  * Force a sub-buffer switch for a per-cpu buffer. This operation is
 422  * completely reentrant : can be called while tracing is active with
 423  * absolutely no lock held.
 424  */
 425 static __inline__ void ltt_force_switch(struct ust_buffer *buf,
 426                 enum force_switch_mode mode)
 427 {
 428         return ltt_force_switch_lockless_slow(buf, mode);
 429 }
 430
 431 /*
 432  * for flight recording. must be called after relay_commit.
 433  * This function increments the subbuffers's commit_seq counter each time the
 434  * commit count reaches back the reserve offset (module subbuffer size). It is
 435  * useful for crash dump.
 436  */
 437 //ust// #ifdef CONFIG_LTT_VMCORE
 438 static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
 439                 struct ust_buffer *buf, long idx, long buf_offset,
 440                 long commit_count, size_t data_size)
 441 {
 442         long offset;
 443         long commit_seq_old;
 444
 445         offset = buf_offset + data_size;
 446
 447         /*
 448          * SUBBUF_OFFSET includes commit_count_mask. We can simply
 449          * compare the offsets within the subbuffer without caring about
 450          * buffer full/empty mismatch because offset is never zero here
 451          * (subbuffer header and event headers have non-zero length).
 452          */
 453         if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
 454                 return;
 455
 456         commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
 457         while (commit_seq_old < commit_count)
 458                 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
 459                                          commit_seq_old, commit_count);
 460
 461         DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
 462 }
 463 //ust// #else
 464 //ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
 465 //ust//                 long idx, long buf_offset, long commit_count, size_t data_size)
 466 //ust// {
 467 //ust// }
 468 //ust// #endif
 469
 470 /*
 471  * Atomic unordered slot commit. Increments the commit count in the
 472  * specified sub-buffer, and delivers it if necessary.
 473  *
 474  * Parameters:
 475  *
 476  * @ltt_channel : channel structure
 477  * @transport_data: transport-specific data
 478  * @buf_offset : offset following the event header.
 479  * @data_size : size of the event data.
 480  * @slot_size : size of the reserved slot.
 481  */
 482 static __inline__ void ltt_commit_slot(
 483                 struct ust_channel *chan,
 484                 struct ust_buffer *buf, long buf_offset,
 485                 size_t data_size, size_t slot_size)
 486 {
 487         long offset_end = buf_offset;
 488         long endidx = SUBBUF_INDEX(offset_end - 1, chan);
 489         long commit_count;
 490
 491         smp_wmb();
 492
 493         uatomic_add(&buf->commit_count[endidx].cc, slot_size);
 494         /*
 495          * commit count read can race with concurrent OOO commit count updates.
 496          * This is only needed for ltt_check_deliver (for non-polling delivery
 497          * only) and for ltt_write_commit_counter. The race can only cause the
 498          * counter to be read with the same value more than once, which could
 499          * cause :
 500          * - Multiple delivery for the same sub-buffer (which is handled
 501          *   gracefully by the reader code) if the value is for a full
 502          *   sub-buffer. It's important that we can never miss a sub-buffer
 503          *   delivery. Re-reading the value after the uatomic_add ensures this.
 504          * - Reading a commit_count with a higher value that what was actually
 505          *   added to it for the ltt_write_commit_counter call (again caused by
 506          *   a concurrent committer). It does not matter, because this function
 507          *   is interested in the fact that the commit count reaches back the
 508          *   reserve offset for a specific sub-buffer, which is completely
 509          *   independent of the order.
 510          */
 511         commit_count = uatomic_read(&buf->commit_count[endidx].cc);
 512
 513         ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
 514         /*
 515          * Update data_size for each commit. It's needed only for extracting
 516          * ltt buffers from vmcore, after crash.
 517          */
 518         ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
 519 }
 520
 521 void _ust_buffers_strncpy_fixup(struct ust_buffer *buf, size_t offset,
 522                                 size_t len, size_t copied, int terminated);
 523
 524 static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
 525         const void *src, size_t len)
 526 {
 527         size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
 528
 529         assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 530         assert(buf_offset + len
 531                <= buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 532
 533         ust_buffers_do_copy(buf->buf_data + buf_offset, src, len);
 534
 535         return len;
 536 }
 537
 538 /*
 539  * ust_buffers_do_memset - write character into dest.
 540  * @dest: destination
 541  * @src: source character
 542  * @len: length to write
 543  */
 544 static __inline__
 545 void ust_buffers_do_memset(void *dest, char src, size_t len)
 546 {
 547         /*
 548          * What we really want here is an __inline__ memset, but we
 549          * don't have constants, so gcc generally uses a function call.
 550          */
 551         for (; len > 0; len--)
 552                 *(u8 *)dest++ = src;
 553 }
 554
 555 /*
 556  * ust_buffers_do_strncpy - copy a string up to a certain number of bytes
 557  * @dest: destination
 558  * @src: source
 559  * @len: max. length to copy
 560  * @terminated: output string ends with \0 (output)
 561  *
 562  * returns the number of bytes copied. Does not finalize with \0 if len is
 563  * reached.
 564  */
 565 static __inline__
 566 size_t ust_buffers_do_strncpy(void *dest, const void *src, size_t len,
 567                               int *terminated)
 568 {
 569         size_t orig_len = len;
 570
 571         *terminated = 0;
 572         /*
 573          * What we really want here is an __inline__ strncpy, but we
 574          * don't have constants, so gcc generally uses a function call.
 575          */
 576         for (; len > 0; len--) {
 577                 *(u8 *)dest = LOAD_SHARED(*(const u8 *)src);
 578                 /* Check with dest, because src may be modified concurrently */
 579                 if (*(const u8 *)dest == '\0') {
 580                         len--;
 581                         *terminated = 1;
 582                         break;
 583                 }
 584                 dest++;
 585                 src++;
 586         }
 587         return orig_len - len;
 588 }
 589
 590 static __inline__
 591 int ust_buffers_strncpy(struct ust_buffer *buf, size_t offset, const void *src,
 592                         size_t len)
 593 {
 594         size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
 595         ssize_t copied;
 596         int terminated;
 597
 598         assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 599         assert(buf_offset + len
 600                <= buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 601
 602         copied = ust_buffers_do_strncpy(buf->buf_data + buf_offset,
 603                                         src, len, &terminated);
 604         if (unlikely(copied < len || !terminated))
 605                 _ust_buffers_strncpy_fixup(buf, offset, len, copied,
 606                                            terminated);
 607         return len;
 608 }
 609
 610 extern int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
 611 extern int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
 612
 613 extern void init_ustrelay_transport(void);
 614
 615 #endif /* _UST_BUFFERS_H */