libust/buffers.h

   1 /*
   2  * buffers.h
   3  * LTTng userspace tracer buffering system
   4  *
   5  * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
   6  * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
  21  */
  22
  23 #ifndef _UST_BUFFERS_H
  24 #define _UST_BUFFERS_H
  25
  26 #include <assert.h>
  27
  28 #include <ust/core.h>
  29 #include <ust/clock.h>
  30
  31 #include "usterr_signal_safe.h"
  32 #include "channels.h"
  33 #include "tracerconst.h"
  34 #include "tracercore.h"
  35
  36 /*
  37  * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
  38  * the offset, which leaves only the buffer number.
  39  */
  40 #define BUFFER_TRUNC(offset, chan) \
  41         ((offset) & (~((chan)->alloc_size-1)))
  42 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
  43 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
  44 #define SUBBUF_ALIGN(offset, chan) \
  45         (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
  46 #define SUBBUF_TRUNC(offset, chan) \
  47         ((offset) & (~((chan)->subbuf_size - 1)))
  48 #define SUBBUF_INDEX(offset, chan) \
  49         (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
  50
  51 /*
  52  * Tracks changes to rchan/rchan_buf structs
  53  */
  54 #define UST_CHANNEL_VERSION             8
  55
  56 /**************************************/
  57
  58 /*
  59  * TODO: using "long" type for struct ust_buffer (control structure
  60  * shared between traced apps and the consumer) is a very bad idea when
  61  * we get to systems with mixed 32/64-bit processes.
  62  *
  63  * But on 64-bit system, we want the full power of 64-bit counters,
  64  * which wraps less often. Therefore, it's not as easy as "use 32-bit
  65  * types everywhere".
  66  *
  67  * One way to deal with this is to:
  68  * 1) Design 64-bit consumer so it can detect 32-bit and 64-bit apps.
  69  * 2) The 32-bit consumer only supports 32-bit apps.
  70  */
  71
  72 struct commit_counters {
  73         long cc;                        /* ATOMIC */
  74         long cc_sb;                     /* ATOMIC - Incremented _once_ at sb switch */
  75 };
  76
  77 struct ust_buffer {
  78         /* First 32 bytes cache-hot cacheline */
  79         long offset;                    /* Current offset in the buffer *atomic* */
  80         struct commit_counters *commit_count;   /* Commit count per sub-buffer */
  81         long consumed;                  /* Current offset in the buffer *atomic* access (shared) */
  82         unsigned long last_tsc;         /*
  83                                          * Last timestamp written in the buffer.
  84                                          */
  85         /* End of first 32 bytes cacheline */
  86         long active_readers;    /* ATOMIC - Active readers count standard atomic access (shared) */
  87         long events_lost;       /* ATOMIC */
  88         long corrupted_subbuffers; /* *ATOMIC* */
  89         /* one byte is written to this pipe when data is available, in order
  90            to wake the consumer */
  91         /* portability: Single byte writes must be as quick as possible. The kernel-side
  92            buffer must be large enough so the writer doesn't block. From the pipe(7)
  93            man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
  94         int data_ready_fd_write;
  95         /* the reading end of the pipe */
  96         int data_ready_fd_read;
  97         /*
  98          * List of buffers with an open pipe, used for fork and forced subbuffer
  99          * switch.
 100          */
 101         struct cds_list_head open_buffers_list;
 102
 103         unsigned int finalized;
 104 //ust// struct timer_list switch_timer; /* timer for periodical switch */
 105         unsigned long switch_timer_interval; /* 0 = unset */
 106
 107         struct ust_channel *chan;
 108
 109         struct urcu_ref urcu_ref;
 110         void *buf_data;
 111         size_t buf_size;
 112         int shmid;
 113         unsigned int cpu;
 114
 115         /* commit count per subbuffer; must be at end of struct */
 116         long commit_seq[0]; /* ATOMIC */
 117 } ____cacheline_aligned;
 118
 119 /*
 120  * A switch is either done during tracing (FORCE_ACTIVE) or as a final
 121  * flush after tracing (with FORCE_FLUSH). FORCE_FLUSH ensures we won't
 122  * write in the new sub-buffer).
 123  */
 124 enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
 125
 126 extern int ltt_reserve_slot_lockless_slow(struct ust_channel *chan,
 127                 struct ust_trace *trace, size_t data_size,
 128                 int largest_align, int cpu,
 129                 struct ust_buffer **ret_buf,
 130                 size_t *slot_size, long *buf_offset,
 131                 u64 *tsc, unsigned int *rflags);
 132
 133 extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
 134                 enum force_switch_mode mode);
 135
 136 #ifndef HAVE_EFFICIENT_UNALIGNED_ACCESS
 137
 138 /*
 139  * Calculate the offset needed to align the type.
 140  * size_of_type must be non-zero.
 141  */
 142 static inline unsigned int ltt_align(size_t align_drift, size_t size_of_type)
 143 {
 144         size_t alignment = min(sizeof(void *), size_of_type);
 145         return (alignment - align_drift) & (alignment - 1);
 146 }
 147 /* Default arch alignment */
 148 #define LTT_ALIGN
 149
 150 static inline int ltt_get_alignment(void)
 151 {
 152         return sizeof(void *);
 153 }
 154
 155 #else /* HAVE_EFFICIENT_UNALIGNED_ACCESS */
 156
 157 static inline unsigned int ltt_align(size_t align_drift,
 158                  size_t size_of_type)
 159 {
 160         return 0;
 161 }
 162
 163 #define LTT_ALIGN __attribute__((packed))
 164
 165 static inline int ltt_get_alignment(void)
 166 {
 167         return 0;
 168 }
 169 #endif /* HAVE_EFFICIENT_UNALIGNED_ACCESS */
 170
 171 static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
 172 {
 173         union {
 174                 const void *src;
 175                 const u8 *src8;
 176                 const u16 *src16;
 177                 const u32 *src32;
 178                 const u64 *src64;
 179         } u = { .src = src };
 180
 181         switch (len) {
 182         case 0: break;
 183         case 1: *(u8 *)dest = *u.src8;
 184                 break;
 185         case 2: *(u16 *)dest = *u.src16;
 186                 break;
 187         case 4: *(u32 *)dest = *u.src32;
 188                 break;
 189         case 8: *(u64 *)dest = *u.src64;
 190                 break;
 191         default:
 192                 memcpy(dest, src, len);
 193         }
 194 }
 195
 196 static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
 197 {
 198         return ((char *)buf->buf_data)+offset;
 199 }
 200
 201 /*
 202  * Last TSC comparison functions. Check if the current TSC overflows
 203  * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
 204  * atomically.
 205  */
 206
 207 /* FIXME: does this test work properly? */
 208 #if (BITS_PER_LONG == 32)
 209 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
 210                                         u64 tsc)
 211 {
 212         ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
 213 }
 214
 215 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
 216                                         u64 tsc)
 217 {
 218         unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
 219
 220         if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
 221                 return 1;
 222         else
 223                 return 0;
 224 }
 225 #else
 226 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
 227                                         u64 tsc)
 228 {
 229         ltt_buf->last_tsc = (unsigned long)tsc;
 230 }
 231
 232 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
 233                                         u64 tsc)
 234 {
 235         if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
 236                 return 1;
 237         else
 238                 return 0;
 239 }
 240 #endif
 241
 242 /*
 243  * ust_get_header_size
 244  *
 245  * Calculate alignment offset to 32-bits. This is the alignment offset of the
 246  * event header.
 247  *
 248  * Important note :
 249  * The event header must be 32-bits. The total offset calculated here :
 250  *
 251  * Alignment of header struct on 32 bits (min arch size, header size)
 252  * + sizeof(header struct)  (32-bits)
 253  * + (opt) u16 (ext. event id)
 254  * + (opt) u16 (event_size) (if event_size == 0xFFFFUL, has ext. event size)
 255  * + (opt) u32 (ext. event size)
 256  * + (opt) u64 full TSC (aligned on min(64-bits, arch size))
 257  *
 258  * The payload must itself determine its own alignment from the biggest type it
 259  * contains.
 260  * */
 261 static __inline__ unsigned char ust_get_header_size(
 262                 struct ust_channel *channel,
 263                 size_t offset,
 264                 size_t data_size,
 265                 size_t *before_hdr_pad,
 266                 unsigned int rflags)
 267 {
 268         size_t orig_offset = offset;
 269         size_t padding;
 270
 271         padding = ltt_align(offset, sizeof(struct ltt_event_header));
 272         offset += padding;
 273         offset += sizeof(struct ltt_event_header);
 274
 275         if(unlikely(rflags)) {
 276                 switch (rflags) {
 277                 case LTT_RFLAG_ID_SIZE_TSC:
 278                         offset += sizeof(u16) + sizeof(u16);
 279                         if (data_size >= 0xFFFFU)
 280                                 offset += sizeof(u32);
 281                         offset += ltt_align(offset, sizeof(u64));
 282                         offset += sizeof(u64);
 283                         break;
 284                 case LTT_RFLAG_ID_SIZE:
 285                         offset += sizeof(u16) + sizeof(u16);
 286                         if (data_size >= 0xFFFFU)
 287                                 offset += sizeof(u32);
 288                         break;
 289                 case LTT_RFLAG_ID:
 290                         offset += sizeof(u16);
 291                         break;
 292                 }
 293         }
 294
 295         *before_hdr_pad = padding;
 296         return offset - orig_offset;
 297 }
 298
 299 static __inline__ void ltt_reserve_push_reader(
 300                 struct ust_channel *rchan,
 301                 struct ust_buffer *buf,
 302                 long offset)
 303 {
 304         long consumed_old, consumed_new;
 305
 306         do {
 307                 consumed_old = uatomic_read(&buf->consumed);
 308                 /*
 309                  * If buffer is in overwrite mode, push the reader consumed
 310                  * count if the write position has reached it and we are not
 311                  * at the first iteration (don't push the reader farther than
 312                  * the writer). This operation can be done concurrently by many
 313                  * writers in the same buffer, the writer being at the farthest
 314                  * write position sub-buffer index in the buffer being the one
 315                  * which will win this loop.
 316                  * If the buffer is not in overwrite mode, pushing the reader
 317                  * only happens if a sub-buffer is corrupted.
 318                  */
 319                 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
 320                    - SUBBUF_TRUNC(consumed_old, buf->chan))
 321                    >= rchan->alloc_size))
 322                         consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
 323                 else
 324                         return;
 325         } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
 326                         consumed_new) != consumed_old));
 327 }
 328
 329 static __inline__ void ltt_vmcore_check_deliver(
 330                 struct ust_buffer *buf,
 331                 long commit_count, long idx)
 332 {
 333         uatomic_set(&buf->commit_seq[idx], commit_count);
 334 }
 335
 336 static __inline__ void ltt_check_deliver(struct ust_channel *chan,
 337                 struct ust_buffer *buf,
 338                 long offset, long commit_count, long idx)
 339 {
 340         long old_commit_count = commit_count - chan->subbuf_size;
 341
 342         /* Check if all commits have been done */
 343         if (unlikely((BUFFER_TRUNC(offset, chan)
 344                         >> chan->n_subbufs_order)
 345                         - (old_commit_count
 346                            & chan->commit_count_mask) == 0)) {
 347                 /*
 348                  * If we succeeded in updating the cc_sb, we are delivering
 349                  * the subbuffer. Deals with concurrent updates of the "cc"
 350                  * value without adding a add_return atomic operation to the
 351                  * fast path.
 352                  */
 353                 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
 354                                          old_commit_count, commit_count)
 355                            == old_commit_count)) {
 356                         int result;
 357
 358                         /*
 359                          * Set noref flag for this subbuffer.
 360                          */
 361 //ust//                 ltt_set_noref_flag(rchan, buf, idx);
 362                         ltt_vmcore_check_deliver(buf, commit_count, idx);
 363
 364                         /* wakeup consumer */
 365                         result = write(buf->data_ready_fd_write, "1", 1);
 366                         if(result == -1) {
 367                                 PERROR("write (in ltt_relay_buffer_flush)");
 368                                 ERR("this should never happen!");
 369                         }
 370                 }
 371         }
 372 }
 373
 374 static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
 375 {
 376         long consumed_old, consumed_idx, commit_count, write_offset;
 377
 378         consumed_old = uatomic_read(&buf->consumed);
 379         consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
 380         commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
 381         /*
 382          * No memory cmm_barrier here, since we are only interested
 383          * in a statistically correct polling result. The next poll will
 384          * get the data is we are racing. The mb() that ensures correct
 385          * memory order is in get_subbuf.
 386          */
 387         write_offset = uatomic_read(&buf->offset);
 388
 389         /*
 390          * Check that the subbuffer we are trying to consume has been
 391          * already fully committed.
 392          */
 393
 394         if (((commit_count - chan->subbuf_size)
 395              & chan->commit_count_mask)
 396             - (BUFFER_TRUNC(consumed_old, buf->chan)
 397                >> chan->n_subbufs_order)
 398             != 0)
 399                 return 0;
 400
 401         /*
 402          * Check that we are not about to read the same subbuffer in
 403          * which the writer head is.
 404          */
 405         if ((SUBBUF_TRUNC(write_offset, buf->chan)
 406            - SUBBUF_TRUNC(consumed_old, buf->chan))
 407            == 0)
 408                 return 0;
 409
 410         return 1;
 411
 412 }
 413
 414 /*
 415  * returns 0 if reserve ok, or 1 if the slow path must be taken.
 416  */
 417 static __inline__ int ltt_relay_try_reserve(
 418                 struct ust_channel *chan,
 419                 struct ust_buffer *buf,
 420                 size_t data_size,
 421                 u64 *tsc, unsigned int *rflags, int largest_align,
 422                 long *o_begin, long *o_end, long *o_old,
 423                 size_t *before_hdr_pad, size_t *size)
 424 {
 425         *o_begin = uatomic_read(&buf->offset);
 426         *o_old = *o_begin;
 427
 428         *tsc = trace_clock_read64();
 429
 430         if (last_tsc_overflow(buf, *tsc))
 431                 *rflags = LTT_RFLAG_ID_SIZE_TSC;
 432
 433         if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
 434                 return 1;
 435
 436         *size = ust_get_header_size(chan,
 437                                 *o_begin, data_size,
 438                                 before_hdr_pad, *rflags);
 439         *size += ltt_align(*o_begin + *size, largest_align) + data_size;
 440         if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
 441                      > buf->chan->subbuf_size))
 442                 return 1;
 443
 444         /*
 445          * Event fits in the current buffer and we are not on a switch
 446          * boundary. It's safe to write.
 447          */
 448         *o_end = *o_begin + *size;
 449
 450         if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
 451                 /*
 452                  * The offset_end will fall at the very beginning of the next
 453                  * subbuffer.
 454                  */
 455                 return 1;
 456
 457         return 0;
 458 }
 459
 460 static __inline__ int ltt_reserve_slot(struct ust_channel *chan,
 461                                        struct ust_trace *trace, size_t data_size,
 462                                        int largest_align, int cpu,
 463                                        struct ust_buffer **ret_buf,
 464                                        size_t *slot_size, long *buf_offset, u64 *tsc,
 465                                        unsigned int *rflags)
 466 {
 467         struct ust_buffer *buf = *ret_buf = chan->buf[cpu];
 468         long o_begin, o_end, o_old;
 469         size_t before_hdr_pad;
 470
 471         /*
 472          * Perform retryable operations.
 473          */
 474         if (unlikely(CMM_LOAD_SHARED(ltt_nesting) > 4)) {
 475                 DBG("Dropping event because nesting is too deep.");
 476                 uatomic_inc(&buf->events_lost);
 477                 return -EPERM;
 478         }
 479
 480         if (unlikely(ltt_relay_try_reserve(chan, buf,
 481                         data_size, tsc, rflags,
 482                         largest_align, &o_begin, &o_end, &o_old,
 483                         &before_hdr_pad, slot_size)))
 484                 goto slow_path;
 485
 486         if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
 487                 goto slow_path;
 488
 489         /*
 490          * Atomically update last_tsc. This update races against concurrent
 491          * atomic updates, but the race will always cause supplementary full TSC
 492          * events, never the opposite (missing a full TSC event when it would be
 493          * needed).
 494          */
 495         save_last_tsc(buf, *tsc);
 496
 497         /*
 498          * Push the reader if necessary
 499          */
 500         ltt_reserve_push_reader(chan, buf, o_end - 1);
 501
 502         *buf_offset = o_begin + before_hdr_pad;
 503         return 0;
 504 slow_path:
 505         return ltt_reserve_slot_lockless_slow(chan, trace, data_size,
 506                                               largest_align, cpu, ret_buf,
 507                                               slot_size, buf_offset, tsc,
 508                                               rflags);
 509 }
 510
 511 /*
 512  * Force a sub-buffer switch for a per-cpu buffer. This operation is
 513  * completely reentrant : can be called while tracing is active with
 514  * absolutely no lock held.
 515  */
 516 static __inline__ void ltt_force_switch(struct ust_buffer *buf,
 517                 enum force_switch_mode mode)
 518 {
 519         return ltt_force_switch_lockless_slow(buf, mode);
 520 }
 521
 522 /*
 523  * for flight recording. must be called after relay_commit.
 524  * This function increments the subbuffers's commit_seq counter each time the
 525  * commit count reaches back the reserve offset (module subbuffer size). It is
 526  * useful for crash dump.
 527  */
 528 static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
 529                 struct ust_buffer *buf, long idx, long buf_offset,
 530                 long commit_count, size_t data_size)
 531 {
 532         long offset;
 533         long commit_seq_old;
 534
 535         offset = buf_offset + data_size;
 536
 537         /*
 538          * SUBBUF_OFFSET includes commit_count_mask. We can simply
 539          * compare the offsets within the subbuffer without caring about
 540          * buffer full/empty mismatch because offset is never zero here
 541          * (subbuffer header and event headers have non-zero length).
 542          */
 543         if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
 544                 return;
 545
 546         commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
 547         while (commit_seq_old < commit_count)
 548                 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
 549                                          commit_seq_old, commit_count);
 550
 551         DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
 552 }
 553
 554 /*
 555  * Atomic unordered slot commit. Increments the commit count in the
 556  * specified sub-buffer, and delivers it if necessary.
 557  *
 558  * Parameters:
 559  *
 560  * @ltt_channel : channel structure
 561  * @transport_data: transport-specific data
 562  * @buf_offset : offset following the event header.
 563  * @data_size : size of the event data.
 564  * @slot_size : size of the reserved slot.
 565  */
 566 static __inline__ void ltt_commit_slot(
 567                 struct ust_channel *chan,
 568                 struct ust_buffer *buf, long buf_offset,
 569                 size_t data_size, size_t slot_size)
 570 {
 571         long offset_end = buf_offset;
 572         long endidx = SUBBUF_INDEX(offset_end - 1, chan);
 573         long commit_count;
 574
 575         cmm_smp_wmb();
 576
 577         uatomic_add(&buf->commit_count[endidx].cc, slot_size);
 578         /*
 579          * commit count read can race with concurrent OOO commit count updates.
 580          * This is only needed for ltt_check_deliver (for non-polling delivery
 581          * only) and for ltt_write_commit_counter. The race can only cause the
 582          * counter to be read with the same value more than once, which could
 583          * cause :
 584          * - Multiple delivery for the same sub-buffer (which is handled
 585          *   gracefully by the reader code) if the value is for a full
 586          *   sub-buffer. It's important that we can never miss a sub-buffer
 587          *   delivery. Re-reading the value after the uatomic_add ensures this.
 588          * - Reading a commit_count with a higher value that what was actually
 589          *   added to it for the ltt_write_commit_counter call (again caused by
 590          *   a concurrent committer). It does not matter, because this function
 591          *   is interested in the fact that the commit count reaches back the
 592          *   reserve offset for a specific sub-buffer, which is completely
 593          *   independent of the order.
 594          */
 595         commit_count = uatomic_read(&buf->commit_count[endidx].cc);
 596
 597         ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
 598         /*
 599          * Update data_size for each commit. It's needed only for extracting
 600          * ltt buffers from vmcore, after crash.
 601          */
 602         ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
 603 }
 604
 605 void _ust_buffers_strncpy_fixup(struct ust_buffer *buf, size_t offset,
 606                                 size_t len, size_t copied, int terminated);
 607
 608 static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
 609         const void *src, size_t len)
 610 {
 611         size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
 612
 613         assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 614         assert(buf_offset + len
 615                <= buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 616
 617         ust_buffers_do_copy(buf->buf_data + buf_offset, src, len);
 618
 619         return len;
 620 }
 621
 622 /*
 623  * ust_buffers_do_memset - write character into dest.
 624  * @dest: destination
 625  * @src: source character
 626  * @len: length to write
 627  */
 628 static __inline__
 629 void ust_buffers_do_memset(void *dest, char src, size_t len)
 630 {
 631         /*
 632          * What we really want here is an __inline__ memset, but we
 633          * don't have constants, so gcc generally uses a function call.
 634          */
 635         for (; len > 0; len--)
 636                 *(u8 *)dest++ = src;
 637 }
 638
 639 /*
 640  * ust_buffers_do_strncpy - copy a string up to a certain number of bytes
 641  * @dest: destination
 642  * @src: source
 643  * @len: max. length to copy
 644  * @terminated: output string ends with \0 (output)
 645  *
 646  * returns the number of bytes copied. Does not finalize with \0 if len is
 647  * reached.
 648  */
 649 static __inline__
 650 size_t ust_buffers_do_strncpy(void *dest, const void *src, size_t len,
 651                               int *terminated)
 652 {
 653         size_t orig_len = len;
 654
 655         *terminated = 0;
 656         /*
 657          * What we really want here is an __inline__ strncpy, but we
 658          * don't have constants, so gcc generally uses a function call.
 659          */
 660         for (; len > 0; len--) {
 661                 *(u8 *)dest = CMM_LOAD_SHARED(*(const u8 *)src);
 662                 /* Check with dest, because src may be modified concurrently */
 663                 if (*(const u8 *)dest == '\0') {
 664                         len--;
 665                         *terminated = 1;
 666                         break;
 667                 }
 668                 dest++;
 669                 src++;
 670         }
 671         return orig_len - len;
 672 }
 673
 674 static __inline__
 675 int ust_buffers_strncpy(struct ust_buffer *buf, size_t offset, const void *src,
 676                         size_t len)
 677 {
 678         size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
 679         ssize_t copied;
 680         int terminated;
 681
 682         assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 683         assert(buf_offset + len
 684                <= buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 685
 686         copied = ust_buffers_do_strncpy(buf->buf_data + buf_offset,
 687                                         src, len, &terminated);
 688         if (unlikely(copied < len || !terminated))
 689                 _ust_buffers_strncpy_fixup(buf, offset, len, copied,
 690                                            terminated);
 691         return len;
 692 }
 693
 694 extern int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
 695 extern int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
 696
 697 extern void init_ustrelay_transport(void);
 698
 699 #endif /* _UST_BUFFERS_H */