libust/buffers.c

   1 /*
   2  * buffers.c
   3  * LTTng userspace tracer buffering system
   4  *
   5  * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
   6  * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
  21  */
  22
  23 #include <unistd.h>
  24 #include <sys/mman.h>
  25 #include <sys/ipc.h>
  26 #include <sys/shm.h>
  27 #include <fcntl.h>
  28 #include <ust/kernelcompat.h>
  29 #include <kcompat/kref.h>
  30 #include <stdlib.h>
  31 #include "buffers.h"
  32 #include "channels.h"
  33 #include "tracer.h"
  34 #include "tracercore.h"
  35 #include "usterr.h"
  36
  37 struct ltt_reserve_switch_offsets {
  38         long begin, end, old;
  39         long begin_switch, end_switch_current, end_switch_old;
  40         size_t before_hdr_pad, size;
  41 };
  42
  43
  44 static DEFINE_MUTEX(ust_buffers_channels_mutex);
  45 static LIST_HEAD(ust_buffers_channels);
  46
  47 static int get_n_cpus(void)
  48 {
  49         int result;
  50         static int n_cpus = 0;
  51
  52         if(n_cpus) {
  53                 return n_cpus;
  54         }
  55
  56         /* On Linux, when some processors are offline
  57          * _SC_NPROCESSORS_CONF counts the offline
  58          * processors, whereas _SC_NPROCESSORS_ONLN
  59          * does not. If we used _SC_NPROCESSORS_ONLN,
  60          * getcpu() could return a value greater than
  61          * this sysconf, in which case the arrays
  62          * indexed by processor would overflow.
  63          */
  64         result = sysconf(_SC_NPROCESSORS_CONF);
  65         if(result == -1) {
  66                 return -1;
  67         }
  68
  69         n_cpus = result;
  70
  71         return result;
  72 }
  73
  74 /* _ust_buffers_write()
  75  *
  76  * @buf: destination buffer
  77  * @offset: offset in destination
  78  * @src: source buffer
  79  * @len: length of source
  80  * @cpy: already copied
  81  */
  82
  83 void _ust_buffers_write(struct ust_buffer *buf, size_t offset,
  84         const void *src, size_t len, ssize_t cpy)
  85 {
  86         do {
  87                 len -= cpy;
  88                 src += cpy;
  89                 offset += cpy;
  90
  91                 WARN_ON(offset >= buf->buf_size);
  92
  93                 cpy = min_t(size_t, len, buf->buf_size - offset);
  94                 ust_buffers_do_copy(buf->buf_data + offset, src, cpy);
  95         } while (unlikely(len != cpy));
  96 }
  97
  98 static int ust_buffers_init_buffer(struct ust_trace *trace,
  99                 struct ust_channel *ltt_chan,
 100                 struct ust_buffer *buf,
 101                 unsigned int n_subbufs);
 102
 103 static int ust_buffers_alloc_buf(struct ust_buffer *buf, size_t *size)
 104 {
 105         void *ptr;
 106         int result;
 107
 108         *size = PAGE_ALIGN(*size);
 109
 110         result = buf->shmid = shmget(getpid(), *size, IPC_CREAT | IPC_EXCL | 0700);
 111         if(result == -1 && errno == EINVAL) {
 112                 ERR("shmget() returned EINVAL; maybe /proc/sys/kernel/shmmax should be increased.");
 113                 return -1;
 114         }
 115         else if(result == -1) {
 116                 PERROR("shmget");
 117                 return -1;
 118         }
 119
 120         /* FIXME: should have matching call to shmdt */
 121         ptr = shmat(buf->shmid, NULL, 0);
 122         if(ptr == (void *) -1) {
 123                 perror("shmat");
 124                 goto destroy_shmem;
 125         }
 126
 127         /* Already mark the shared memory for destruction. This will occur only
 128          * when all users have detached.
 129          */
 130         result = shmctl(buf->shmid, IPC_RMID, NULL);
 131         if(result == -1) {
 132                 perror("shmctl");
 133                 return -1;
 134         }
 135
 136         buf->buf_data = ptr;
 137         buf->buf_size = *size;
 138
 139         return 0;
 140
 141         destroy_shmem:
 142         result = shmctl(buf->shmid, IPC_RMID, NULL);
 143         if(result == -1) {
 144                 perror("shmctl");
 145         }
 146
 147         return -1;
 148 }
 149
 150 int ust_buffers_create_buf(struct ust_channel *channel, int cpu)
 151 {
 152         int result;
 153         struct ust_buffer *buf = channel->buf[cpu];
 154
 155         buf->cpu = cpu;
 156         result = ust_buffers_alloc_buf(buf, &channel->alloc_size);
 157         if(result)
 158                 return -1;
 159
 160         buf->chan = channel;
 161         kref_get(&channel->kref);
 162         return 0;
 163 }
 164
 165 static void ust_buffers_destroy_channel(struct kref *kref)
 166 {
 167         struct ust_channel *chan = container_of(kref, struct ust_channel, kref);
 168         free(chan);
 169 }
 170
 171 static void ust_buffers_destroy_buf(struct ust_buffer *buf)
 172 {
 173         struct ust_channel *chan = buf->chan;
 174         int result;
 175
 176         result = munmap(buf->buf_data, buf->buf_size);
 177         if(result == -1) {
 178                 PERROR("munmap");
 179         }
 180
 181 //ust// chan->buf[buf->cpu] = NULL;
 182         free(buf);
 183         kref_put(&chan->kref, ust_buffers_destroy_channel);
 184 }
 185
 186 /* called from kref_put */
 187 static void ust_buffers_remove_buf(struct kref *kref)
 188 {
 189         struct ust_buffer *buf = container_of(kref, struct ust_buffer, kref);
 190         ust_buffers_destroy_buf(buf);
 191 }
 192
 193 int ust_buffers_open_buf(struct ust_channel *chan, int cpu)
 194 {
 195         int result;
 196
 197         result = ust_buffers_create_buf(chan, cpu);
 198         if (result == -1)
 199                 return -1;
 200
 201         kref_init(&chan->buf[cpu]->kref);
 202
 203         result = ust_buffers_init_buffer(chan->trace, chan, chan->buf[cpu], chan->subbuf_cnt);
 204         if(result == -1)
 205                 return -1;
 206
 207         return 0;
 208
 209         /* FIXME: decrementally destroy on error? */
 210 }
 211
 212 /**
 213  *      ust_buffers_close_buf - close a channel buffer
 214  *      @buf: buffer
 215  */
 216 static void ust_buffers_close_buf(struct ust_buffer *buf)
 217 {
 218         kref_put(&buf->kref, ust_buffers_remove_buf);
 219 }
 220
 221 int ust_buffers_channel_open(struct ust_channel *chan, size_t subbuf_size, size_t subbuf_cnt)
 222 {
 223         int i;
 224         int result;
 225
 226         if(subbuf_size == 0 || subbuf_cnt == 0)
 227                 return -1;
 228
 229         /* Check that the subbuffer size is larger than a page. */
 230         WARN_ON_ONCE(subbuf_size < PAGE_SIZE);
 231
 232         /*
 233          * Make sure the number of subbuffers and subbuffer size are power of 2.
 234          */
 235         WARN_ON_ONCE(hweight32(subbuf_size) != 1);
 236         WARN_ON(hweight32(subbuf_cnt) != 1);
 237
 238         chan->version = UST_CHANNEL_VERSION;
 239         chan->subbuf_cnt = subbuf_cnt;
 240         chan->subbuf_size = subbuf_size;
 241         chan->subbuf_size_order = get_count_order(subbuf_size);
 242         chan->alloc_size = subbuf_size * subbuf_cnt;
 243
 244         kref_init(&chan->kref);
 245
 246         mutex_lock(&ust_buffers_channels_mutex);
 247         for(i=0; i<chan->n_cpus; i++) {
 248                 result = ust_buffers_open_buf(chan, i);
 249                 if (result == -1)
 250                         goto error;
 251         }
 252         list_add(&chan->list, &ust_buffers_channels);
 253         mutex_unlock(&ust_buffers_channels_mutex);
 254
 255         return 0;
 256
 257         /* Jump directly inside the loop to close the buffers that were already
 258          * opened. */
 259         for(; i>=0; i--) {
 260                 ust_buffers_close_buf(chan->buf[i]);
 261 error:
 262                 do {} while(0);
 263         }
 264
 265         kref_put(&chan->kref, ust_buffers_destroy_channel);
 266         mutex_unlock(&ust_buffers_channels_mutex);
 267         return -1;
 268 }
 269
 270 void ust_buffers_channel_close(struct ust_channel *chan)
 271 {
 272         int i;
 273         if(!chan)
 274                 return;
 275
 276         mutex_lock(&ust_buffers_channels_mutex);
 277         for(i=0; i<chan->n_cpus; i++) {
 278         /* FIXME: if we make it here, then all buffers were necessarily allocated. Moreover, we don't
 279          * initialize to NULL so we cannot use this check. Should we? */
 280 //ust//         if (chan->buf[i])
 281                         ust_buffers_close_buf(chan->buf[i]);
 282         }
 283
 284         list_del(&chan->list);
 285         kref_put(&chan->kref, ust_buffers_destroy_channel);
 286         mutex_unlock(&ust_buffers_channels_mutex);
 287 }
 288
 289 /*
 290  * -------
 291  */
 292
 293 static void ust_buffers_destroy_buffer(struct ust_channel *ltt_chan, int cpu);
 294
 295 static void ltt_force_switch(struct ust_buffer *buf,
 296                 enum force_switch_mode mode);
 297
 298 /*
 299  * Trace callbacks
 300  */
 301 static void ltt_buffer_begin(struct ust_buffer *buf,
 302                         u64 tsc, unsigned int subbuf_idx)
 303 {
 304         struct ust_channel *channel = buf->chan;
 305         struct ltt_subbuffer_header *header =
 306                 (struct ltt_subbuffer_header *)
 307                         ust_buffers_offset_address(buf,
 308                                 subbuf_idx * buf->chan->subbuf_size);
 309
 310         header->cycle_count_begin = tsc;
 311         header->data_size = 0xFFFFFFFF; /* for recognizing crashed buffers */
 312         header->sb_size = 0xFFFFFFFF; /* for recognizing crashed buffers */
 313         /* FIXME: add memory barrier? */
 314         ltt_write_trace_header(channel->trace, header);
 315 }
 316
 317 /*
 318  * offset is assumed to never be 0 here : never deliver a completely empty
 319  * subbuffer. The lost size is between 0 and subbuf_size-1.
 320  */
 321 static notrace void ltt_buffer_end(struct ust_buffer *buf,
 322                 u64 tsc, unsigned int offset, unsigned int subbuf_idx)
 323 {
 324         struct ltt_subbuffer_header *header =
 325                 (struct ltt_subbuffer_header *)
 326                         ust_buffers_offset_address(buf,
 327                                 subbuf_idx * buf->chan->subbuf_size);
 328         u32 data_size = SUBBUF_OFFSET(offset - 1, buf->chan) + 1;
 329
 330         header->data_size = data_size;
 331         header->sb_size = PAGE_ALIGN(data_size);
 332         header->cycle_count_end = tsc;
 333         header->events_lost = uatomic_read(&buf->events_lost);
 334         header->subbuf_corrupt = uatomic_read(&buf->corrupted_subbuffers);
 335         if(unlikely(header->events_lost > 0)) {
 336                 DBG("Some events (%d) were lost in %s_%d", header->events_lost, buf->chan->channel_name, buf->cpu);
 337         }
 338 }
 339
 340 /*
 341  * This function should not be called from NMI interrupt context
 342  */
 343 static notrace void ltt_buf_unfull(struct ust_buffer *buf,
 344                 unsigned int subbuf_idx,
 345                 long offset)
 346 {
 347 }
 348
 349 /*
 350  * Promote compiler barrier to a smp_mb().
 351  * For the specific LTTng case, this IPI call should be removed if the
 352  * architecture does not reorder writes.  This should eventually be provided by
 353  * a separate architecture-specific infrastructure.
 354  */
 355 //ust// static void remote_mb(void *info)
 356 //ust// {
 357 //ust//         smp_mb();
 358 //ust// }
 359
 360 int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed)
 361 {
 362         struct ust_channel *channel = buf->chan;
 363         long consumed_old, consumed_idx, commit_count, write_offset;
 364 //ust// int retval;
 365
 366         consumed_old = uatomic_read(&buf->consumed);
 367         consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
 368         commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
 369         /*
 370          * Make sure we read the commit count before reading the buffer
 371          * data and the write offset. Correct consumed offset ordering
 372          * wrt commit count is insured by the use of cmpxchg to update
 373          * the consumed offset.
 374          * smp_call_function_single can fail if the remote CPU is offline,
 375          * this is OK because then there is no wmb to execute there.
 376          * If our thread is executing on the same CPU as the on the buffers
 377          * belongs to, we don't have to synchronize it at all. If we are
 378          * migrated, the scheduler will take care of the memory barriers.
 379          * Normally, smp_call_function_single() should ensure program order when
 380          * executing the remote function, which implies that it surrounds the
 381          * function execution with :
 382          * smp_mb()
 383          * send IPI
 384          * csd_lock_wait
 385          *                recv IPI
 386          *                smp_mb()
 387          *                exec. function
 388          *                smp_mb()
 389          *                csd unlock
 390          * smp_mb()
 391          *
 392          * However, smp_call_function_single() does not seem to clearly execute
 393          * such barriers. It depends on spinlock semantic to provide the barrier
 394          * before executing the IPI and, when busy-looping, csd_lock_wait only
 395          * executes smp_mb() when it has to wait for the other CPU.
 396          *
 397          * I don't trust this code. Therefore, let's add the smp_mb() sequence
 398          * required ourself, even if duplicated. It has no performance impact
 399          * anyway.
 400          *
 401          * smp_mb() is needed because smp_rmb() and smp_wmb() only order read vs
 402          * read and write vs write. They do not ensure core synchronization. We
 403          * really have to ensure total order between the 3 barriers running on
 404          * the 2 CPUs.
 405          */
 406 //ust// #ifdef LTT_NO_IPI_BARRIER
 407         /*
 408          * Local rmb to match the remote wmb to read the commit count before the
 409          * buffer data and the write offset.
 410          */
 411         smp_rmb();
 412 //ust// #else
 413 //ust//         if (raw_smp_processor_id() != buf->cpu) {
 414 //ust//                 smp_mb();       /* Total order with IPI handler smp_mb() */
 415 //ust//                 smp_call_function_single(buf->cpu, remote_mb, NULL, 1);
 416 //ust//                 smp_mb();       /* Total order with IPI handler smp_mb() */
 417 //ust//         }
 418 //ust// #endif
 419
 420         write_offset = uatomic_read(&buf->offset);
 421         /*
 422          * Check that the subbuffer we are trying to consume has been
 423          * already fully committed.
 424          */
 425         if (((commit_count - buf->chan->subbuf_size)
 426              & channel->commit_count_mask)
 427             - (BUFFER_TRUNC(consumed_old, buf->chan)
 428                >> channel->n_subbufs_order)
 429             != 0) {
 430                 return -EAGAIN;
 431         }
 432         /*
 433          * Check that we are not about to read the same subbuffer in
 434          * which the writer head is.
 435          */
 436         if ((SUBBUF_TRUNC(write_offset, buf->chan)
 437            - SUBBUF_TRUNC(consumed_old, buf->chan))
 438            == 0) {
 439                 return -EAGAIN;
 440         }
 441
 442         /* FIXME: is this ok to disable the reading feature? */
 443 //ust// retval = update_read_sb_index(buf, consumed_idx);
 444 //ust// if (retval)
 445 //ust//         return retval;
 446
 447         *consumed = consumed_old;
 448
 449         return 0;
 450 }
 451
 452 int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old)
 453 {
 454         long consumed_new, consumed_old;
 455
 456         consumed_old = uatomic_read(&buf->consumed);
 457         consumed_old = consumed_old & (~0xFFFFFFFFL);
 458         consumed_old = consumed_old | uconsumed_old;
 459         consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
 460
 461 //ust// spin_lock(&ltt_buf->full_lock);
 462         if (uatomic_cmpxchg(&buf->consumed, consumed_old,
 463                                 consumed_new)
 464             != consumed_old) {
 465                 /* We have been pushed by the writer : the last
 466                  * buffer read _is_ corrupted! It can also
 467                  * happen if this is a buffer we never got. */
 468 //ust//         spin_unlock(&ltt_buf->full_lock);
 469                 return -EIO;
 470         } else {
 471                 /* tell the client that buffer is now unfull */
 472                 int index;
 473                 long data;
 474                 index = SUBBUF_INDEX(consumed_old, buf->chan);
 475                 data = BUFFER_OFFSET(consumed_old, buf->chan);
 476                 ltt_buf_unfull(buf, index, data);
 477 //ust//         spin_unlock(&ltt_buf->full_lock);
 478         }
 479         return 0;
 480 }
 481
 482 //ust// static void switch_buffer(unsigned long data)
 483 //ust// {
 484 //ust//         struct ltt_channel_buf_struct *ltt_buf =
 485 //ust//                 (struct ltt_channel_buf_struct *)data;
 486 //ust//         struct rchan_buf *buf = ltt_buf->rbuf;
 487 //ust//
 488 //ust//         if (buf)
 489 //ust//                 ltt_force_switch(buf, FORCE_ACTIVE);
 490 //ust//
 491 //ust//         ltt_buf->switch_timer.expires += ltt_buf->switch_timer_interval;
 492 //ust//         add_timer_on(&ltt_buf->switch_timer, smp_processor_id());
 493 //ust// }
 494 //ust//
 495 //ust// static void start_switch_timer(struct ltt_channel_struct *ltt_channel)
 496 //ust// {
 497 //ust//         struct rchan *rchan = ltt_channel->trans_channel_data;
 498 //ust//         int cpu;
 499 //ust//
 500 //ust//         if (!ltt_channel->switch_timer_interval)
 501 //ust//                 return;
 502 //ust//
 503 //ust//         // TODO : hotplug
 504 //ust//         for_each_online_cpu(cpu) {
 505 //ust//                 struct ltt_channel_buf_struct *ltt_buf;
 506 //ust//                 struct rchan_buf *buf;
 507 //ust//
 508 //ust//                 buf = rchan->buf[cpu];
 509 //ust//                 ltt_buf = buf->chan_private;
 510 //ust//                 buf->random_access = 1;
 511 //ust//                 ltt_buf->switch_timer_interval =
 512 //ust//                         ltt_channel->switch_timer_interval;
 513 //ust//                 init_timer(&ltt_buf->switch_timer);
 514 //ust//                 ltt_buf->switch_timer.function = switch_buffer;
 515 //ust//                 ltt_buf->switch_timer.expires = jiffies +
 516 //ust//                                         ltt_buf->switch_timer_interval;
 517 //ust//                 ltt_buf->switch_timer.data = (unsigned long)ltt_buf;
 518 //ust//                 add_timer_on(&ltt_buf->switch_timer, cpu);
 519 //ust//         }
 520 //ust// }
 521 //ust//
 522 //ust// /*
 523 //ust//  * Cannot use del_timer_sync with add_timer_on, so use an IPI to locally
 524 //ust//  * delete the timer.
 525 //ust//  */
 526 //ust// static void stop_switch_timer_ipi(void *info)
 527 //ust// {
 528 //ust//         struct ltt_channel_buf_struct *ltt_buf =
 529 //ust//                 (struct ltt_channel_buf_struct *)info;
 530 //ust//
 531 //ust//         del_timer(&ltt_buf->switch_timer);
 532 //ust// }
 533 //ust//
 534 //ust// static void stop_switch_timer(struct ltt_channel_struct *ltt_channel)
 535 //ust// {
 536 //ust//         struct rchan *rchan = ltt_channel->trans_channel_data;
 537 //ust//         int cpu;
 538 //ust//
 539 //ust//         if (!ltt_channel->switch_timer_interval)
 540 //ust//                 return;
 541 //ust//
 542 //ust//         // TODO : hotplug
 543 //ust//         for_each_online_cpu(cpu) {
 544 //ust//                 struct ltt_channel_buf_struct *ltt_buf;
 545 //ust//                 struct rchan_buf *buf;
 546 //ust//
 547 //ust//                 buf = rchan->buf[cpu];
 548 //ust//                 ltt_buf = buf->chan_private;
 549 //ust//                 smp_call_function(stop_switch_timer_ipi, ltt_buf, 1);
 550 //ust//                 buf->random_access = 0;
 551 //ust//         }
 552 //ust// }
 553
 554 //ust// static void ust_buffers_print_written(struct ust_channel *chan,
 555 //ust//                 long cons_off, unsigned int cpu)
 556 //ust// {
 557 //ust//         struct ust_buffer *buf = chan->buf[cpu];
 558 //ust//         long cons_idx, events_count;
 559 //ust//
 560 //ust//         cons_idx = SUBBUF_INDEX(cons_off, chan);
 561 //ust//         events_count = uatomic_read(&buf->commit_count[cons_idx].events);
 562 //ust//
 563 //ust//         if (events_count)
 564 //ust//                 printk(KERN_INFO
 565 //ust//                         "channel %s: %lu events written (cpu %u, index %lu)\n",
 566 //ust//                         chan->channel_name, events_count, cpu, cons_idx);
 567 //ust// }
 568
 569 static void ltt_relay_print_subbuffer_errors(
 570                 struct ust_channel *channel,
 571                 long cons_off, int cpu)
 572 {
 573         struct ust_buffer *ltt_buf = channel->buf[cpu];
 574         long cons_idx, commit_count, commit_count_sb, write_offset;
 575
 576         cons_idx = SUBBUF_INDEX(cons_off, channel);
 577         commit_count = uatomic_read(&ltt_buf->commit_count[cons_idx].cc);
 578         commit_count_sb = uatomic_read(&ltt_buf->commit_count[cons_idx].cc_sb);
 579
 580         /*
 581          * No need to order commit_count and write_offset reads because we
 582          * execute after trace is stopped when there are no readers left.
 583          */
 584         write_offset = uatomic_read(&ltt_buf->offset);
 585         WARN( "LTT : unread channel %s offset is %ld "
 586                 "and cons_off : %ld (cpu %d)\n",
 587                 channel->channel_name, write_offset, cons_off, cpu);
 588         /* Check each sub-buffer for non filled commit count */
 589         if (((commit_count - channel->subbuf_size) & channel->commit_count_mask)
 590             - (BUFFER_TRUNC(cons_off, channel) >> channel->n_subbufs_order) != 0) {
 591                 ERR("LTT : %s : subbuffer %lu has non filled "
 592                         "commit count [cc, cc_sb] [%lu,%lu].\n",
 593                         channel->channel_name, cons_idx, commit_count, commit_count_sb);
 594         }
 595         ERR("LTT : %s : commit count : %lu, subbuf size %zd\n",
 596                         channel->channel_name, commit_count,
 597                         channel->subbuf_size);
 598 }
 599
 600 static void ltt_relay_print_errors(struct ust_trace *trace,
 601                 struct ust_channel *channel, int cpu)
 602 {
 603         struct ust_buffer *ltt_buf = channel->buf[cpu];
 604         long cons_off;
 605
 606         /*
 607          * Can be called in the error path of allocation when
 608          * trans_channel_data is not yet set.
 609          */
 610         if (!channel)
 611                 return;
 612
 613 //ust// for (cons_off = 0; cons_off < rchan->alloc_size;
 614 //ust//      cons_off = SUBBUF_ALIGN(cons_off, rchan))
 615 //ust//         ust_buffers_print_written(ltt_chan, cons_off, cpu);
 616         for (cons_off = uatomic_read(&ltt_buf->consumed);
 617                         (SUBBUF_TRUNC(uatomic_read(&ltt_buf->offset),
 618                                       channel)
 619                          - cons_off) > 0;
 620                         cons_off = SUBBUF_ALIGN(cons_off, channel))
 621                 ltt_relay_print_subbuffer_errors(channel, cons_off, cpu);
 622 }
 623
 624 static void ltt_relay_print_buffer_errors(struct ust_channel *channel, int cpu)
 625 {
 626         struct ust_trace *trace = channel->trace;
 627         struct ust_buffer *ltt_buf = channel->buf[cpu];
 628
 629         if (uatomic_read(&ltt_buf->events_lost))
 630                 ERR("channel %s: %ld events lost (cpu %d)",
 631                         channel->channel_name,
 632                         uatomic_read(&ltt_buf->events_lost), cpu);
 633         if (uatomic_read(&ltt_buf->corrupted_subbuffers))
 634                 ERR("channel %s : %ld corrupted subbuffers (cpu %d)",
 635                         channel->channel_name,
 636                         uatomic_read(&ltt_buf->corrupted_subbuffers), cpu);
 637
 638         ltt_relay_print_errors(trace, channel, cpu);
 639 }
 640
 641 static void ltt_relay_release_channel(struct kref *kref)
 642 {
 643         struct ust_channel *ltt_chan = container_of(kref,
 644                         struct ust_channel, kref);
 645         free(ltt_chan->buf);
 646 }
 647
 648 /*
 649  * Create ltt buffer.
 650  */
 651 //ust// static int ltt_relay_create_buffer(struct ust_trace *trace,
 652 //ust//                 struct ltt_channel_struct *ltt_chan, struct rchan_buf *buf,
 653 //ust//                 unsigned int cpu, unsigned int n_subbufs)
 654 //ust// {
 655 //ust//         struct ltt_channel_buf_struct *ltt_buf =
 656 //ust//                 percpu_ptr(ltt_chan->buf, cpu);
 657 //ust//         unsigned int j;
 658 //ust//
 659 //ust//         ltt_buf->commit_count =
 660 //ust//                 kzalloc_node(sizeof(ltt_buf->commit_count) * n_subbufs,
 661 //ust//                         GFP_KERNEL, cpu_to_node(cpu));
 662 //ust//         if (!ltt_buf->commit_count)
 663 //ust//                 return -ENOMEM;
 664 //ust//         kref_get(&trace->kref);
 665 //ust//         kref_get(&trace->ltt_transport_kref);
 666 //ust//         kref_get(&ltt_chan->kref);
 667 //ust//         uatomic_set(&ltt_buf->offset, ltt_subbuffer_header_size());
 668 //ust//         uatomic_set(&ltt_buf->consumed, 0);
 669 //ust//         uatomic_set(&ltt_buf->active_readers, 0);
 670 //ust//         for (j = 0; j < n_subbufs; j++)
 671 //ust//                 uatomic_set(&ltt_buf->commit_count[j], 0);
 672 //ust//         init_waitqueue_head(&ltt_buf->write_wait);
 673 //ust//         uatomic_set(&ltt_buf->wakeup_readers, 0);
 674 //ust//         spin_lock_init(&ltt_buf->full_lock);
 675 //ust//
 676 //ust//         ltt_buffer_begin_callback(buf, trace->start_tsc, 0);
 677 //ust//         /* atomic_add made on local variable on data that belongs to
 678 //ust//          * various CPUs : ok because tracing not started (for this cpu). */
 679 //ust//         uatomic_add(&ltt_buf->commit_count[0], ltt_subbuffer_header_size());
 680 //ust//
 681 //ust//         uatomic_set(&ltt_buf->events_lost, 0);
 682 //ust//         uatomic_set(&ltt_buf->corrupted_subbuffers, 0);
 683 //ust//
 684 //ust//         return 0;
 685 //ust// }
 686
 687 static int ust_buffers_init_buffer(struct ust_trace *trace,
 688                 struct ust_channel *ltt_chan, struct ust_buffer *buf,
 689                 unsigned int n_subbufs)
 690 {
 691         unsigned int j;
 692         int fds[2];
 693         int result;
 694
 695         buf->commit_count =
 696                 zmalloc(sizeof(*buf->commit_count) * n_subbufs);
 697         if (!buf->commit_count)
 698                 return -ENOMEM;
 699         kref_get(&trace->kref);
 700         kref_get(&trace->ltt_transport_kref);
 701         kref_get(&ltt_chan->kref);
 702         uatomic_set(&buf->offset, ltt_subbuffer_header_size());
 703         uatomic_set(&buf->consumed, 0);
 704         uatomic_set(&buf->active_readers, 0);
 705         for (j = 0; j < n_subbufs; j++) {
 706                 uatomic_set(&buf->commit_count[j].cc, 0);
 707                 uatomic_set(&buf->commit_count[j].cc_sb, 0);
 708         }
 709 //ust// init_waitqueue_head(&buf->write_wait);
 710 //ust// uatomic_set(&buf->wakeup_readers, 0);
 711 //ust// spin_lock_init(&buf->full_lock);
 712
 713         ltt_buffer_begin(buf, trace->start_tsc, 0);
 714
 715         uatomic_add(&buf->commit_count[0].cc, ltt_subbuffer_header_size());
 716
 717         uatomic_set(&buf->events_lost, 0);
 718         uatomic_set(&buf->corrupted_subbuffers, 0);
 719
 720         result = pipe(fds);
 721         if(result == -1) {
 722                 PERROR("pipe");
 723                 return -1;
 724         }
 725         buf->data_ready_fd_read = fds[0];
 726         buf->data_ready_fd_write = fds[1];
 727
 728         /* FIXME: do we actually need this? */
 729         result = fcntl(fds[0], F_SETFL, O_NONBLOCK);
 730         if(result == -1) {
 731                 PERROR("fcntl");
 732         }
 733
 734 //ust// buf->commit_seq = malloc(sizeof(buf->commit_seq) * n_subbufs);
 735 //ust// if(!ltt_buf->commit_seq) {
 736 //ust//         return -1;
 737 //ust// }
 738         memset(buf->commit_seq, 0, sizeof(buf->commit_seq[0]) * n_subbufs);
 739
 740         /* FIXME: decrementally destroy on error */
 741
 742         return 0;
 743 }
 744
 745 /* FIXME: use this function */
 746 static void ust_buffers_destroy_buffer(struct ust_channel *ltt_chan, int cpu)
 747 {
 748         struct ust_trace *trace = ltt_chan->trace;
 749         struct ust_buffer *ltt_buf = ltt_chan->buf[cpu];
 750
 751         kref_put(&ltt_chan->trace->ltt_transport_kref,
 752                 ltt_release_transport);
 753         ltt_relay_print_buffer_errors(ltt_chan, cpu);
 754 //ust// free(ltt_buf->commit_seq);
 755         free(ltt_buf->commit_count);
 756         ltt_buf->commit_count = NULL;
 757         kref_put(&ltt_chan->kref, ltt_relay_release_channel);
 758         kref_put(&trace->kref, ltt_release_trace);
 759 //ust// wake_up_interruptible(&trace->kref_wq);
 760 }
 761
 762 static int ust_buffers_alloc_channel_buf_structs(struct ust_channel *chan)
 763 {
 764         void *ptr;
 765         int result;
 766         size_t size;
 767         int i;
 768
 769         size = PAGE_ALIGN(1);
 770
 771         for(i=0; i<chan->n_cpus; i++) {
 772
 773                 result = chan->buf_struct_shmids[i] = shmget(getpid(), size, IPC_CREAT | IPC_EXCL | 0700);
 774                 if(result == -1) {
 775                         PERROR("shmget");
 776                         goto destroy_previous;
 777                 }
 778
 779                 /* FIXME: should have matching call to shmdt */
 780                 ptr = shmat(chan->buf_struct_shmids[i], NULL, 0);
 781                 if(ptr == (void *) -1) {
 782                         perror("shmat");
 783                         goto destroy_shm;
 784                 }
 785
 786                 /* Already mark the shared memory for destruction. This will occur only
 787                  * when all users have detached.
 788                  */
 789                 result = shmctl(chan->buf_struct_shmids[i], IPC_RMID, NULL);
 790                 if(result == -1) {
 791                         perror("shmctl");
 792                         goto destroy_previous;
 793                 }
 794
 795                 chan->buf[i] = ptr;
 796         }
 797
 798         return 0;
 799
 800         /* Jumping inside this loop occurs from within the other loop above with i as
 801          * counter, so it unallocates the structures for the cpu = current_i down to
 802          * zero. */
 803         for(; i>=0; i--) {
 804                 destroy_shm:
 805                 result = shmctl(chan->buf_struct_shmids[i], IPC_RMID, NULL);
 806                 if(result == -1) {
 807                         perror("shmctl");
 808                 }
 809
 810                 destroy_previous:
 811                 continue;
 812         }
 813
 814         return -1;
 815 }
 816
 817 /*
 818  * Create channel.
 819  */
 820 static int ust_buffers_create_channel(const char *trace_name, struct ust_trace *trace,
 821         const char *channel_name, struct ust_channel *ltt_chan,
 822         unsigned int subbuf_size, unsigned int n_subbufs, int overwrite)
 823 {
 824         int result;
 825
 826         kref_init(&ltt_chan->kref);
 827
 828         ltt_chan->trace = trace;
 829         ltt_chan->overwrite = overwrite;
 830         ltt_chan->n_subbufs_order = get_count_order(n_subbufs);
 831         ltt_chan->commit_count_mask = (~0UL >> ltt_chan->n_subbufs_order);
 832         ltt_chan->n_cpus = get_n_cpus();
 833 //ust// ltt_chan->buf = percpu_alloc_mask(sizeof(struct ltt_channel_buf_struct), GFP_KERNEL, cpu_possible_map);
 834         ltt_chan->buf = (void *) malloc(ltt_chan->n_cpus * sizeof(void *));
 835         if(ltt_chan->buf == NULL) {
 836                 goto error;
 837         }
 838         ltt_chan->buf_struct_shmids = (int *) malloc(ltt_chan->n_cpus * sizeof(int));
 839         if(ltt_chan->buf_struct_shmids == NULL)
 840                 goto free_buf;
 841
 842         result = ust_buffers_alloc_channel_buf_structs(ltt_chan);
 843         if(result != 0) {
 844                 goto free_buf_struct_shmids;
 845         }
 846
 847         result = ust_buffers_channel_open(ltt_chan, subbuf_size, n_subbufs);
 848         if (result != 0) {
 849                 ERR("Cannot open channel for trace %s", trace_name);
 850                 goto unalloc_buf_structs;
 851         }
 852
 853         return 0;
 854
 855 unalloc_buf_structs:
 856         /* FIXME: put a call here to unalloc the buf structs! */
 857
 858 free_buf_struct_shmids:
 859         free(ltt_chan->buf_struct_shmids);
 860
 861 free_buf:
 862         free(ltt_chan->buf);
 863
 864 error:
 865         return -1;
 866 }
 867
 868 /*
 869  * LTTng channel flush function.
 870  *
 871  * Must be called when no tracing is active in the channel, because of
 872  * accesses across CPUs.
 873  */
 874 static notrace void ltt_relay_buffer_flush(struct ust_buffer *buf)
 875 {
 876         int result;
 877
 878 //ust// buf->finalized = 1;
 879         ltt_force_switch(buf, FORCE_FLUSH);
 880
 881         result = write(buf->data_ready_fd_write, "1", 1);
 882         if(result == -1) {
 883                 PERROR("write (in ltt_relay_buffer_flush)");
 884                 ERR("this should never happen!");
 885         }
 886 }
 887
 888 static void ltt_relay_async_wakeup_chan(struct ust_channel *ltt_channel)
 889 {
 890 //ust// unsigned int i;
 891 //ust// struct rchan *rchan = ltt_channel->trans_channel_data;
 892 //ust//
 893 //ust// for_each_possible_cpu(i) {
 894 //ust//         struct ltt_channel_buf_struct *ltt_buf =
 895 //ust//                 percpu_ptr(ltt_channel->buf, i);
 896 //ust//
 897 //ust//         if (uatomic_read(&ltt_buf->wakeup_readers) == 1) {
 898 //ust//                 uatomic_set(&ltt_buf->wakeup_readers, 0);
 899 //ust//                 wake_up_interruptible(&rchan->buf[i]->read_wait);
 900 //ust//         }
 901 //ust// }
 902 }
 903
 904 static void ltt_relay_finish_buffer(struct ust_channel *channel, unsigned int cpu)
 905 {
 906 //      int result;
 907
 908         if (channel->buf[cpu]) {
 909                 struct ust_buffer *buf = channel->buf[cpu];
 910                 ltt_relay_buffer_flush(buf);
 911 //ust//         ltt_relay_wake_writers(ltt_buf);
 912                 /* closing the pipe tells the consumer the buffer is finished */
 913
 914                 //result = write(ltt_buf->data_ready_fd_write, "D", 1);
 915                 //if(result == -1) {
 916                 //      PERROR("write (in ltt_relay_finish_buffer)");
 917                 //      ERR("this should never happen!");
 918                 //}
 919                 close(buf->data_ready_fd_write);
 920         }
 921 }
 922
 923
 924 static void ltt_relay_finish_channel(struct ust_channel *channel)
 925 {
 926         unsigned int i;
 927
 928         for(i=0; i<channel->n_cpus; i++) {
 929                 ltt_relay_finish_buffer(channel, i);
 930         }
 931 }
 932
 933 static void ltt_relay_remove_channel(struct ust_channel *channel)
 934 {
 935         ust_buffers_channel_close(channel);
 936         kref_put(&channel->kref, ltt_relay_release_channel);
 937 }
 938
 939 //ust// /*
 940 //ust//  * Returns :
 941 //ust//  * 0 if ok
 942 //ust//  * !0 if execution must be aborted.
 943 //ust//  */
 944 //ust// static inline int ltt_relay_try_reserve(
 945 //ust//                 struct ust_channel *channel, struct ust_buffer *buf,
 946 //ust//                 struct ltt_reserve_switch_offsets *offsets, size_t data_size,
 947 //ust//                 u64 *tsc, unsigned int *rflags, int largest_align)
 948 //ust// {
 949 //ust//         offsets->begin = uatomic_read(&buf->offset);
 950 //ust//         offsets->old = offsets->begin;
 951 //ust//         offsets->begin_switch = 0;
 952 //ust//         offsets->end_switch_current = 0;
 953 //ust//         offsets->end_switch_old = 0;
 954 //ust//
 955 //ust//         *tsc = trace_clock_read64();
 956 //ust//         if (last_tsc_overflow(buf, *tsc))
 957 //ust//                 *rflags = LTT_RFLAG_ID_SIZE_TSC;
 958 //ust//
 959 //ust//         if (SUBBUF_OFFSET(offsets->begin, buf->chan) == 0) {
 960 //ust//                 offsets->begin_switch = 1;              /* For offsets->begin */
 961 //ust//         } else {
 962 //ust//                 offsets->size = ust_get_header_size(channel,
 963 //ust//                                         offsets->begin, data_size,
 964 //ust//                                         &offsets->before_hdr_pad, *rflags);
 965 //ust//                 offsets->size += ltt_align(offsets->begin + offsets->size,
 966 //ust//                                            largest_align)
 967 //ust//                                  + data_size;
 968 //ust//                 if ((SUBBUF_OFFSET(offsets->begin, buf->chan) + offsets->size)
 969 //ust//                                 > buf->chan->subbuf_size) {
 970 //ust//                         offsets->end_switch_old = 1;    /* For offsets->old */
 971 //ust//                         offsets->begin_switch = 1;      /* For offsets->begin */
 972 //ust//                 }
 973 //ust//         }
 974 //ust//         if (offsets->begin_switch) {
 975 //ust//                 long subbuf_index;
 976 //ust//
 977 //ust//                 if (offsets->end_switch_old)
 978 //ust//                         offsets->begin = SUBBUF_ALIGN(offsets->begin,
 979 //ust//                                                       buf->chan);
 980 //ust//                 offsets->begin = offsets->begin + ltt_subbuffer_header_size();
 981 //ust//                 /* Test new buffer integrity */
 982 //ust//                 subbuf_index = SUBBUF_INDEX(offsets->begin, buf->chan);
 983 //ust//                 offsets->reserve_commit_diff =
 984 //ust//                         (BUFFER_TRUNC(offsets->begin, buf->chan)
 985 //ust//                          >> channel->n_subbufs_order)
 986 //ust//                         - (uatomic_read(&buf->commit_count[subbuf_index])
 987 //ust//                                 & channel->commit_count_mask);
 988 //ust//                 if (offsets->reserve_commit_diff == 0) {
 989 //ust//                         long consumed;
 990 //ust//
 991 //ust//                         consumed = uatomic_read(&buf->consumed);
 992 //ust//
 993 //ust//                         /* Next buffer not corrupted. */
 994 //ust//                         if (!channel->overwrite &&
 995 //ust//                                 (SUBBUF_TRUNC(offsets->begin, buf->chan)
 996 //ust//                                  - SUBBUF_TRUNC(consumed, buf->chan))
 997 //ust//                                 >= channel->alloc_size) {
 998 //ust//
 999 //ust//                                 long consumed_idx = SUBBUF_INDEX(consumed, buf->chan);
1000 //ust//                                 long commit_count = uatomic_read(&buf->commit_count[consumed_idx]);
1001 //ust//                                 if(((commit_count - buf->chan->subbuf_size) & channel->commit_count_mask) - (BUFFER_TRUNC(consumed, buf->chan) >> channel->n_subbufs_order) != 0) {
1002 //ust//                                         WARN("Event dropped. Caused by non-committed event.");
1003 //ust//                                 }
1004 //ust//                                 else {
1005 //ust//                                         WARN("Event dropped. Caused by non-consumed buffer.");
1006 //ust//                                 }
1007 //ust//                                 /*
1008 //ust//                                  * We do not overwrite non consumed buffers
1009 //ust//                                  * and we are full : event is lost.
1010 //ust//                                  */
1011 //ust//                                 uatomic_inc(&buf->events_lost);
1012 //ust//                                 return -1;
1013 //ust//                         } else {
1014 //ust//                                 /*
1015 //ust//                                  * next buffer not corrupted, we are either in
1016 //ust//                                  * overwrite mode or the buffer is not full.
1017 //ust//                                  * It's safe to write in this new subbuffer.
1018 //ust//                                  */
1019 //ust//                         }
1020 //ust//                 } else {
1021 //ust//                         /*
1022 //ust//                          * Next subbuffer corrupted. Force pushing reader even
1023 //ust//                          * in normal mode. It's safe to write in this new
1024 //ust//                          * subbuffer.
1025 //ust//                          */
1026 //ust//                 }
1027 //ust//                 offsets->size = ust_get_header_size(channel,
1028 //ust//                                         offsets->begin, data_size,
1029 //ust//                                         &offsets->before_hdr_pad, *rflags);
1030 //ust//                 offsets->size += ltt_align(offsets->begin + offsets->size,
1031 //ust//                                            largest_align)
1032 //ust//                                  + data_size;
1033 //ust//                 if ((SUBBUF_OFFSET(offsets->begin, buf->chan) + offsets->size)
1034 //ust//                                 > buf->chan->subbuf_size) {
1035 //ust//                         /*
1036 //ust//                          * Event too big for subbuffers, report error, don't
1037 //ust//                          * complete the sub-buffer switch.
1038 //ust//                          */
1039 //ust//                         uatomic_inc(&buf->events_lost);
1040 //ust//                         return -1;
1041 //ust//                 } else {
1042 //ust//                         /*
1043 //ust//                          * We just made a successful buffer switch and the event
1044 //ust//                          * fits in the new subbuffer. Let's write.
1045 //ust//                          */
1046 //ust//                 }
1047 //ust//         } else {
1048 //ust//                 /*
1049 //ust//                  * Event fits in the current buffer and we are not on a switch
1050 //ust//                  * boundary. It's safe to write.
1051 //ust//                  */
1052 //ust//         }
1053 //ust//         offsets->end = offsets->begin + offsets->size;
1054 //ust//
1055 //ust//         if ((SUBBUF_OFFSET(offsets->end, buf->chan)) == 0) {
1056 //ust//                 /*
1057 //ust//                  * The offset_end will fall at the very beginning of the next
1058 //ust//                  * subbuffer.
1059 //ust//                  */
1060 //ust//                 offsets->end_switch_current = 1;        /* For offsets->begin */
1061 //ust//         }
1062 //ust//         return 0;
1063 //ust// }
1064 //ust//
1065 //ust// /*
1066 //ust//  * Returns :
1067 //ust//  * 0 if ok
1068 //ust//  * !0 if execution must be aborted.
1069 //ust//  */
1070 //ust// static inline int ltt_relay_try_switch(
1071 //ust//                 enum force_switch_mode mode,
1072 //ust//                 struct ust_channel *channel,
1073 //ust//                 struct ust_buffer *buf,
1074 //ust//                 struct ltt_reserve_switch_offsets *offsets,
1075 //ust//                 u64 *tsc)
1076 //ust// {
1077 //ust//         long subbuf_index;
1078 //ust//
1079 //ust//         offsets->begin = uatomic_read(&buf->offset);
1080 //ust//         offsets->old = offsets->begin;
1081 //ust//         offsets->begin_switch = 0;
1082 //ust//         offsets->end_switch_old = 0;
1083 //ust//
1084 //ust//         *tsc = trace_clock_read64();
1085 //ust//
1086 //ust//         if (SUBBUF_OFFSET(offsets->begin, buf->chan) != 0) {
1087 //ust//                 offsets->begin = SUBBUF_ALIGN(offsets->begin, buf->chan);
1088 //ust//                 offsets->end_switch_old = 1;
1089 //ust//         } else {
1090 //ust//                 /* we do not have to switch : buffer is empty */
1091 //ust//                 return -1;
1092 //ust//         }
1093 //ust//         if (mode == FORCE_ACTIVE)
1094 //ust//                 offsets->begin += ltt_subbuffer_header_size();
1095 //ust//         /*
1096 //ust//          * Always begin_switch in FORCE_ACTIVE mode.
1097 //ust//          * Test new buffer integrity
1098 //ust//          */
1099 //ust//         subbuf_index = SUBBUF_INDEX(offsets->begin, buf->chan);
1100 //ust//         offsets->reserve_commit_diff =
1101 //ust//                 (BUFFER_TRUNC(offsets->begin, buf->chan)
1102 //ust//                  >> channel->n_subbufs_order)
1103 //ust//                 - (uatomic_read(&buf->commit_count[subbuf_index])
1104 //ust//                         & channel->commit_count_mask);
1105 //ust//         if (offsets->reserve_commit_diff == 0) {
1106 //ust//                 /* Next buffer not corrupted. */
1107 //ust//                 if (mode == FORCE_ACTIVE
1108 //ust//                     && !channel->overwrite
1109 //ust//                     && offsets->begin - uatomic_read(&buf->consumed)
1110 //ust//                        >= channel->alloc_size) {
1111 //ust//                         /*
1112 //ust//                          * We do not overwrite non consumed buffers and we are
1113 //ust//                          * full : ignore switch while tracing is active.
1114 //ust//                          */
1115 //ust//                         return -1;
1116 //ust//                 }
1117 //ust//         } else {
1118 //ust//                 /*
1119 //ust//                  * Next subbuffer corrupted. Force pushing reader even in normal
1120 //ust//                  * mode
1121 //ust//                  */
1122 //ust//         }
1123 //ust//         offsets->end = offsets->begin;
1124 //ust//         return 0;
1125 //ust// }
1126 //ust//
1127 //ust// static inline void ltt_reserve_push_reader(
1128 //ust//                 struct ust_channel *channel,
1129 //ust//                 struct ust_buffer *buf,
1130 //ust//                 struct ltt_reserve_switch_offsets *offsets)
1131 //ust// {
1132 //ust//         long consumed_old, consumed_new;
1133 //ust//
1134 //ust//         do {
1135 //ust//                 consumed_old = uatomic_read(&buf->consumed);
1136 //ust//                 /*
1137 //ust//                  * If buffer is in overwrite mode, push the reader consumed
1138 //ust//                  * count if the write position has reached it and we are not
1139 //ust//                  * at the first iteration (don't push the reader farther than
1140 //ust//                  * the writer). This operation can be done concurrently by many
1141 //ust//                  * writers in the same buffer, the writer being at the farthest
1142 //ust//                  * write position sub-buffer index in the buffer being the one
1143 //ust//                  * which will win this loop.
1144 //ust//                  * If the buffer is not in overwrite mode, pushing the reader
1145 //ust//                  * only happens if a sub-buffer is corrupted.
1146 //ust//                  */
1147 //ust//                 if ((SUBBUF_TRUNC(offsets->end-1, buf->chan)
1148 //ust//                    - SUBBUF_TRUNC(consumed_old, buf->chan))
1149 //ust//                    >= channel->alloc_size)
1150 //ust//                         consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
1151 //ust//                 else {
1152 //ust//                         consumed_new = consumed_old;
1153 //ust//                         break;
1154 //ust//                 }
1155 //ust//         } while (uatomic_cmpxchg(&buf->consumed, consumed_old,
1156 //ust//                         consumed_new) != consumed_old);
1157 //ust//
1158 //ust//         if (consumed_old != consumed_new) {
1159 //ust//                 /*
1160 //ust//                  * Reader pushed : we are the winner of the push, we can
1161 //ust//                  * therefore reequilibrate reserve and commit. Atomic increment
1162 //ust//                  * of the commit count permits other writers to play around
1163 //ust//                  * with this variable before us. We keep track of
1164 //ust//                  * corrupted_subbuffers even in overwrite mode :
1165 //ust//                  * we never want to write over a non completely committed
1166 //ust//                  * sub-buffer : possible causes : the buffer size is too low
1167 //ust//                  * compared to the unordered data input, or there is a writer
1168 //ust//                  * that died between the reserve and the commit.
1169 //ust//                  */
1170 //ust//                 if (offsets->reserve_commit_diff) {
1171 //ust//                         /*
1172 //ust//                          * We have to alter the sub-buffer commit count.
1173 //ust//                          * We do not deliver the previous subbuffer, given it
1174 //ust//                          * was either corrupted or not consumed (overwrite
1175 //ust//                          * mode).
1176 //ust//                          */
1177 //ust//                         uatomic_add(&buf->commit_count[SUBBUF_INDEX(offsets->begin, buf->chan)],
1178 //ust//                                 offsets->reserve_commit_diff);
1179 //ust//                         if (!channel->overwrite
1180 //ust//                             || offsets->reserve_commit_diff
1181 //ust//                                != channel->subbuf_size) {
1182 //ust//                                 /*
1183 //ust//                                  * The reserve commit diff was not subbuf_size :
1184 //ust//                                  * it means the subbuffer was partly written to
1185 //ust//                                  * and is therefore corrupted. If it is multiple
1186 //ust//                                  * of subbuffer size and we are in flight
1187 //ust//                                  * recorder mode, we are skipping over a whole
1188 //ust//                                  * subbuffer.
1189 //ust//                                  */
1190 //ust//                                 uatomic_inc(&buf->corrupted_subbuffers);
1191 //ust//                         }
1192 //ust//                 }
1193 //ust//         }
1194 //ust// }
1195 //ust//
1196 //ust// /**
1197 //ust//  * ltt_relay_reserve_slot - Atomic slot reservation in a LTTng buffer.
1198 //ust//  * @trace: the trace structure to log to.
1199 //ust//  * @ltt_channel: channel structure
1200 //ust//  * @transport_data: data structure specific to ltt relay
1201 //ust//  * @data_size: size of the variable length data to log.
1202 //ust//  * @slot_size: pointer to total size of the slot (out)
1203 //ust//  * @buf_offset : pointer to reserved buffer offset (out)
1204 //ust//  * @tsc: pointer to the tsc at the slot reservation (out)
1205 //ust//  * @cpu: cpuid
1206 //ust//  *
1207 //ust//  * Return : -ENOSPC if not enough space, else returns 0.
1208 //ust//  * It will take care of sub-buffer switching.
1209 //ust//  */
1210 //ust// static notrace int ltt_relay_reserve_slot(struct ust_trace *trace,
1211 //ust//                 struct ust_channel *channel, void **transport_data,
1212 //ust//                 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
1213 //ust//                 unsigned int *rflags, int largest_align, int cpu)
1214 //ust// {
1215 //ust//         struct ust_buffer *buf = *transport_data = channel->buf[cpu];
1216 //ust//         struct ltt_reserve_switch_offsets offsets;
1217 //ust//
1218 //ust//         offsets.reserve_commit_diff = 0;
1219 //ust//         offsets.size = 0;
1220 //ust//
1221 //ust//         /*
1222 //ust//          * Perform retryable operations.
1223 //ust//          */
1224 //ust//         if (ltt_nesting > 4) {
1225 //ust//                 uatomic_inc(&buf->events_lost);
1226 //ust//                 return -EPERM;
1227 //ust//         }
1228 //ust//         do {
1229 //ust//                 if (ltt_relay_try_reserve(channel, buf, &offsets, data_size, tsc, rflags,
1230 //ust//                                 largest_align))
1231 //ust//                         return -ENOSPC;
1232 //ust//         } while (uatomic_cmpxchg(&buf->offset, offsets.old,
1233 //ust//                         offsets.end) != offsets.old);
1234 //ust//
1235 //ust//         /*
1236 //ust//          * Atomically update last_tsc. This update races against concurrent
1237 //ust//          * atomic updates, but the race will always cause supplementary full TSC
1238 //ust//          * events, never the opposite (missing a full TSC event when it would be
1239 //ust//          * needed).
1240 //ust//          */
1241 //ust//         save_last_tsc(buf, *tsc);
1242 //ust//
1243 //ust//         /*
1244 //ust//          * Push the reader if necessary
1245 //ust//          */
1246 //ust//         ltt_reserve_push_reader(channel, buf, &offsets);
1247 //ust//
1248 //ust//         /*
1249 //ust//          * Switch old subbuffer if needed.
1250 //ust//          */
1251 //ust//         if (offsets.end_switch_old)
1252 //ust//                 ltt_reserve_switch_old_subbuf(channel, buf, &offsets, tsc);
1253 //ust//
1254 //ust//         /*
1255 //ust//          * Populate new subbuffer.
1256 //ust//          */
1257 //ust//         if (offsets.begin_switch)
1258 //ust//                 ltt_reserve_switch_new_subbuf(channel, buf, &offsets, tsc);
1259 //ust//
1260 //ust//         if (offsets.end_switch_current)
1261 //ust//                 ltt_reserve_end_switch_current(channel, buf, &offsets, tsc);
1262 //ust//
1263 //ust//         *slot_size = offsets.size;
1264 //ust//         *buf_offset = offsets.begin + offsets.before_hdr_pad;
1265 //ust//         return 0;
1266 //ust// }
1267 //ust//
1268 //ust// /*
1269 //ust//  * Force a sub-buffer switch for a per-cpu buffer. This operation is
1270 //ust//  * completely reentrant : can be called while tracing is active with
1271 //ust//  * absolutely no lock held.
1272 //ust//  */
1273 //ust// static notrace void ltt_force_switch(struct ust_buffer *buf,
1274 //ust//                 enum force_switch_mode mode)
1275 //ust// {
1276 //ust//         struct ust_channel *channel = buf->chan;
1277 //ust//         struct ltt_reserve_switch_offsets offsets;
1278 //ust//         u64 tsc;
1279 //ust//
1280 //ust//         offsets.reserve_commit_diff = 0;
1281 //ust//         offsets.size = 0;
1282 //ust//
1283 //ust//         /*
1284 //ust//          * Perform retryable operations.
1285 //ust//          */
1286 //ust//         do {
1287 //ust//                 if (ltt_relay_try_switch(mode, channel, buf, &offsets, &tsc))
1288 //ust//                         return;
1289 //ust//         } while (uatomic_cmpxchg(&buf->offset, offsets.old,
1290 //ust//                         offsets.end) != offsets.old);
1291 //ust//
1292 //ust//         /*
1293 //ust//          * Atomically update last_tsc. This update races against concurrent
1294 //ust//          * atomic updates, but the race will always cause supplementary full TSC
1295 //ust//          * events, never the opposite (missing a full TSC event when it would be
1296 //ust//          * needed).
1297 //ust//          */
1298 //ust//         save_last_tsc(buf, tsc);
1299 //ust//
1300 //ust//         /*
1301 //ust//          * Push the reader if necessary
1302 //ust//          */
1303 //ust//         if (mode == FORCE_ACTIVE)
1304 //ust//                 ltt_reserve_push_reader(channel, buf, &offsets);
1305 //ust//
1306 //ust//         /*
1307 //ust//          * Switch old subbuffer if needed.
1308 //ust//          */
1309 //ust//         if (offsets.end_switch_old)
1310 //ust//                 ltt_reserve_switch_old_subbuf(channel, buf, &offsets, &tsc);
1311 //ust//
1312 //ust//         /*
1313 //ust//          * Populate new subbuffer.
1314 //ust//          */
1315 //ust//         if (mode == FORCE_ACTIVE)
1316 //ust//                 ltt_reserve_switch_new_subbuf(channel, buf, &offsets, &tsc);
1317 //ust// }
1318
1319 /*
1320  * ltt_reserve_switch_old_subbuf: switch old subbuffer
1321  *
1322  * Concurrency safe because we are the last and only thread to alter this
1323  * sub-buffer. As long as it is not delivered and read, no other thread can
1324  * alter the offset, alter the reserve_count or call the
1325  * client_buffer_end_callback on this sub-buffer.
1326  *
1327  * The only remaining threads could be the ones with pending commits. They will
1328  * have to do the deliver themselves.  Not concurrency safe in overwrite mode.
1329  * We detect corrupted subbuffers with commit and reserve counts. We keep a
1330  * corrupted sub-buffers count and push the readers across these sub-buffers.
1331  *
1332  * Not concurrency safe if a writer is stalled in a subbuffer and another writer
1333  * switches in, finding out it's corrupted.  The result will be than the old
1334  * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer
1335  * will be declared corrupted too because of the commit count adjustment.
1336  *
1337  * Note : offset_old should never be 0 here.
1338  */
1339 static void ltt_reserve_switch_old_subbuf(
1340                 struct ust_channel *chan, struct ust_buffer *buf,
1341                 struct ltt_reserve_switch_offsets *offsets, u64 *tsc)
1342 {
1343         long oldidx = SUBBUF_INDEX(offsets->old - 1, chan);
1344         long commit_count, padding_size;
1345
1346         padding_size = chan->subbuf_size
1347                         - (SUBBUF_OFFSET(offsets->old - 1, chan) + 1);
1348         ltt_buffer_end(buf, *tsc, offsets->old, oldidx);
1349
1350         /*
1351          * Must write slot data before incrementing commit count.
1352          * This compiler barrier is upgraded into a smp_wmb() by the IPI
1353          * sent by get_subbuf() when it does its smp_rmb().
1354          */
1355         barrier();
1356         uatomic_add(&buf->commit_count[oldidx].cc, padding_size);
1357         commit_count = uatomic_read(&buf->commit_count[oldidx].cc);
1358         ltt_check_deliver(chan, buf, offsets->old - 1, commit_count, oldidx);
1359         ltt_write_commit_counter(chan, buf, oldidx,
1360                 offsets->old, commit_count, padding_size);
1361 }
1362
1363 /*
1364  * ltt_reserve_switch_new_subbuf: Populate new subbuffer.
1365  *
1366  * This code can be executed unordered : writers may already have written to the
1367  * sub-buffer before this code gets executed, caution.  The commit makes sure
1368  * that this code is executed before the deliver of this sub-buffer.
1369  */
1370 static void ltt_reserve_switch_new_subbuf(
1371                 struct ust_channel *chan, struct ust_buffer *buf,
1372                 struct ltt_reserve_switch_offsets *offsets, u64 *tsc)
1373 {
1374         long beginidx = SUBBUF_INDEX(offsets->begin, chan);
1375         long commit_count;
1376
1377         ltt_buffer_begin(buf, *tsc, beginidx);
1378
1379         /*
1380          * Must write slot data before incrementing commit count.
1381          * This compiler barrier is upgraded into a smp_wmb() by the IPI
1382          * sent by get_subbuf() when it does its smp_rmb().
1383          */
1384         barrier();
1385         uatomic_add(&buf->commit_count[beginidx].cc, ltt_subbuffer_header_size());
1386         commit_count = uatomic_read(&buf->commit_count[beginidx].cc);
1387         /* Check if the written buffer has to be delivered */
1388         ltt_check_deliver(chan, buf, offsets->begin, commit_count, beginidx);
1389         ltt_write_commit_counter(chan, buf, beginidx,
1390                 offsets->begin, commit_count, ltt_subbuffer_header_size());
1391 }
1392
1393 /*
1394  * ltt_reserve_end_switch_current: finish switching current subbuffer
1395  *
1396  * Concurrency safe because we are the last and only thread to alter this
1397  * sub-buffer. As long as it is not delivered and read, no other thread can
1398  * alter the offset, alter the reserve_count or call the
1399  * client_buffer_end_callback on this sub-buffer.
1400  *
1401  * The only remaining threads could be the ones with pending commits. They will
1402  * have to do the deliver themselves.  Not concurrency safe in overwrite mode.
1403  * We detect corrupted subbuffers with commit and reserve counts. We keep a
1404  * corrupted sub-buffers count and push the readers across these sub-buffers.
1405  *
1406  * Not concurrency safe if a writer is stalled in a subbuffer and another writer
1407  * switches in, finding out it's corrupted.  The result will be than the old
1408  * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer
1409  * will be declared corrupted too because of the commit count adjustment.
1410  */
1411 static void ltt_reserve_end_switch_current(
1412                 struct ust_channel *chan,
1413                 struct ust_buffer *buf,
1414                 struct ltt_reserve_switch_offsets *offsets, u64 *tsc)
1415 {
1416         long endidx = SUBBUF_INDEX(offsets->end - 1, chan);
1417         long commit_count, padding_size;
1418
1419         padding_size = chan->subbuf_size
1420                         - (SUBBUF_OFFSET(offsets->end - 1, chan) + 1);
1421
1422         ltt_buffer_end(buf, *tsc, offsets->end, endidx);
1423
1424         /*
1425          * Must write slot data before incrementing commit count.
1426          * This compiler barrier is upgraded into a smp_wmb() by the IPI
1427          * sent by get_subbuf() when it does its smp_rmb().
1428          */
1429         barrier();
1430         uatomic_add(&buf->commit_count[endidx].cc, padding_size);
1431         commit_count = uatomic_read(&buf->commit_count[endidx].cc);
1432         ltt_check_deliver(chan, buf,
1433                 offsets->end - 1, commit_count, endidx);
1434         ltt_write_commit_counter(chan, buf, endidx,
1435                 offsets->end, commit_count, padding_size);
1436 }
1437
1438 /*
1439  * Returns :
1440  * 0 if ok
1441  * !0 if execution must be aborted.
1442  */
1443 static int ltt_relay_try_switch_slow(
1444                 enum force_switch_mode mode,
1445                 struct ust_channel *chan,
1446                 struct ust_buffer *buf,
1447                 struct ltt_reserve_switch_offsets *offsets,
1448                 u64 *tsc)
1449 {
1450         long subbuf_index;
1451         long reserve_commit_diff;
1452
1453         offsets->begin = uatomic_read(&buf->offset);
1454         offsets->old = offsets->begin;
1455         offsets->begin_switch = 0;
1456         offsets->end_switch_old = 0;
1457
1458         *tsc = trace_clock_read64();
1459
1460         if (SUBBUF_OFFSET(offsets->begin, buf->chan) != 0) {
1461                 offsets->begin = SUBBUF_ALIGN(offsets->begin, buf->chan);
1462                 offsets->end_switch_old = 1;
1463         } else {
1464                 /* we do not have to switch : buffer is empty */
1465                 return -1;
1466         }
1467         if (mode == FORCE_ACTIVE)
1468                 offsets->begin += ltt_subbuffer_header_size();
1469         /*
1470          * Always begin_switch in FORCE_ACTIVE mode.
1471          * Test new buffer integrity
1472          */
1473         subbuf_index = SUBBUF_INDEX(offsets->begin, buf->chan);
1474         reserve_commit_diff =
1475                 (BUFFER_TRUNC(offsets->begin, buf->chan)
1476                  >> chan->n_subbufs_order)
1477                 - (uatomic_read(&buf->commit_count[subbuf_index].cc_sb)
1478                         & chan->commit_count_mask);
1479         if (reserve_commit_diff == 0) {
1480                 /* Next buffer not corrupted. */
1481                 if (mode == FORCE_ACTIVE
1482                     && !chan->overwrite
1483                     && offsets->begin - uatomic_read(&buf->consumed)
1484                        >= chan->alloc_size) {
1485                         /*
1486                          * We do not overwrite non consumed buffers and we are
1487                          * full : ignore switch while tracing is active.
1488                          */
1489                         return -1;
1490                 }
1491         } else {
1492                 /*
1493                  * Next subbuffer corrupted. Force pushing reader even in normal
1494                  * mode
1495                  */
1496         }
1497         offsets->end = offsets->begin;
1498         return 0;
1499 }
1500
1501 /*
1502  * Force a sub-buffer switch for a per-cpu buffer. This operation is
1503  * completely reentrant : can be called while tracing is active with
1504  * absolutely no lock held.
1505  */
1506 void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
1507                 enum force_switch_mode mode)
1508 {
1509         struct ust_channel *chan = buf->chan;
1510         struct ltt_reserve_switch_offsets offsets;
1511         u64 tsc;
1512
1513         offsets.size = 0;
1514
1515         DBG("Switching (forced) %s_%d", chan->channel_name, buf->cpu);
1516         /*
1517          * Perform retryable operations.
1518          */
1519         do {
1520                 if (ltt_relay_try_switch_slow(mode, chan, buf,
1521                                 &offsets, &tsc))
1522                         return;
1523         } while (uatomic_cmpxchg(&buf->offset, offsets.old,
1524                         offsets.end) != offsets.old);
1525
1526         /*
1527          * Atomically update last_tsc. This update races against concurrent
1528          * atomic updates, but the race will always cause supplementary full TSC
1529          * events, never the opposite (missing a full TSC event when it would be
1530          * needed).
1531          */
1532         save_last_tsc(buf, tsc);
1533
1534         /*
1535          * Push the reader if necessary
1536          */
1537         if (mode == FORCE_ACTIVE) {
1538                 ltt_reserve_push_reader(chan, buf, offsets.end - 1);
1539 //ust//         ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(offsets.end - 1, chan));
1540         }
1541
1542         /*
1543          * Switch old subbuffer if needed.
1544          */
1545         if (offsets.end_switch_old) {
1546 //ust//         ltt_clear_noref_flag(rchan, buf, SUBBUF_INDEX(offsets.old - 1, rchan));
1547                 ltt_reserve_switch_old_subbuf(chan, buf, &offsets, &tsc);
1548         }
1549
1550         /*
1551          * Populate new subbuffer.
1552          */
1553         if (mode == FORCE_ACTIVE)
1554                 ltt_reserve_switch_new_subbuf(chan, buf, &offsets, &tsc);
1555 }
1556
1557 /*
1558  * Returns :
1559  * 0 if ok
1560  * !0 if execution must be aborted.
1561  */
1562 static int ltt_relay_try_reserve_slow(struct ust_channel *chan, struct ust_buffer *buf,
1563                 struct ltt_reserve_switch_offsets *offsets, size_t data_size,
1564                 u64 *tsc, unsigned int *rflags, int largest_align)
1565 {
1566         long reserve_commit_diff;
1567
1568         offsets->begin = uatomic_read(&buf->offset);
1569         offsets->old = offsets->begin;
1570         offsets->begin_switch = 0;
1571         offsets->end_switch_current = 0;
1572         offsets->end_switch_old = 0;
1573
1574         *tsc = trace_clock_read64();
1575         if (last_tsc_overflow(buf, *tsc))
1576                 *rflags = LTT_RFLAG_ID_SIZE_TSC;
1577
1578         if (unlikely(SUBBUF_OFFSET(offsets->begin, buf->chan) == 0)) {
1579                 offsets->begin_switch = 1;              /* For offsets->begin */
1580         } else {
1581                 offsets->size = ust_get_header_size(chan,
1582                                         offsets->begin, data_size,
1583                                         &offsets->before_hdr_pad, *rflags);
1584                 offsets->size += ltt_align(offsets->begin + offsets->size,
1585                                            largest_align)
1586                                  + data_size;
1587                 if (unlikely((SUBBUF_OFFSET(offsets->begin, buf->chan) +
1588                              offsets->size) > buf->chan->subbuf_size)) {
1589                         offsets->end_switch_old = 1;    /* For offsets->old */
1590                         offsets->begin_switch = 1;      /* For offsets->begin */
1591                 }
1592         }
1593         if (unlikely(offsets->begin_switch)) {
1594                 long subbuf_index;
1595
1596                 /*
1597                  * We are typically not filling the previous buffer completely.
1598                  */
1599                 if (likely(offsets->end_switch_old))
1600                         offsets->begin = SUBBUF_ALIGN(offsets->begin,
1601                                                       buf->chan);
1602                 offsets->begin = offsets->begin + ltt_subbuffer_header_size();
1603                 /* Test new buffer integrity */
1604                 subbuf_index = SUBBUF_INDEX(offsets->begin, buf->chan);
1605                 reserve_commit_diff =
1606                   (BUFFER_TRUNC(offsets->begin, buf->chan)
1607                    >> chan->n_subbufs_order)
1608                   - (uatomic_read(&buf->commit_count[subbuf_index].cc_sb)
1609                                 & chan->commit_count_mask);
1610                 if (likely(reserve_commit_diff == 0)) {
1611                         /* Next buffer not corrupted. */
1612                         if (unlikely(!chan->overwrite &&
1613                                 (SUBBUF_TRUNC(offsets->begin, buf->chan)
1614                                  - SUBBUF_TRUNC(uatomic_read(
1615                                                         &buf->consumed),
1616                                                 buf->chan))
1617                                 >= chan->alloc_size)) {
1618                                 /*
1619                                  * We do not overwrite non consumed buffers
1620                                  * and we are full : event is lost.
1621                                  */
1622                                 uatomic_inc(&buf->events_lost);
1623                                 return -1;
1624                         } else {
1625                                 /*
1626                                  * next buffer not corrupted, we are either in
1627                                  * overwrite mode or the buffer is not full.
1628                                  * It's safe to write in this new subbuffer.
1629                                  */
1630                         }
1631                 } else {
1632                         /*
1633                          * Next subbuffer corrupted. Drop event in normal and
1634                          * overwrite mode. Caused by either a writer OOPS or
1635                          * too many nested writes over a reserve/commit pair.
1636                          */
1637                         uatomic_inc(&buf->events_lost);
1638                         return -1;
1639                 }
1640                 offsets->size = ust_get_header_size(chan,
1641                                         offsets->begin, data_size,
1642                                         &offsets->before_hdr_pad, *rflags);
1643                 offsets->size += ltt_align(offsets->begin + offsets->size,
1644                                            largest_align)
1645                                  + data_size;
1646                 if (unlikely((SUBBUF_OFFSET(offsets->begin, buf->chan)
1647                              + offsets->size) > buf->chan->subbuf_size)) {
1648                         /*
1649                          * Event too big for subbuffers, report error, don't
1650                          * complete the sub-buffer switch.
1651                          */
1652                         uatomic_inc(&buf->events_lost);
1653                         return -1;
1654                 } else {
1655                         /*
1656                          * We just made a successful buffer switch and the event
1657                          * fits in the new subbuffer. Let's write.
1658                          */
1659                 }
1660         } else {
1661                 /*
1662                  * Event fits in the current buffer and we are not on a switch
1663                  * boundary. It's safe to write.
1664                  */
1665         }
1666         offsets->end = offsets->begin + offsets->size;
1667
1668         if (unlikely((SUBBUF_OFFSET(offsets->end, buf->chan)) == 0)) {
1669                 /*
1670                  * The offset_end will fall at the very beginning of the next
1671                  * subbuffer.
1672                  */
1673                 offsets->end_switch_current = 1;        /* For offsets->begin */
1674         }
1675         return 0;
1676 }
1677
1678 /**
1679  * ltt_relay_reserve_slot_lockless_slow - Atomic slot reservation in a buffer.
1680  * @trace: the trace structure to log to.
1681  * @ltt_channel: channel structure
1682  * @transport_data: data structure specific to ltt relay
1683  * @data_size: size of the variable length data to log.
1684  * @slot_size: pointer to total size of the slot (out)
1685  * @buf_offset : pointer to reserved buffer offset (out)
1686  * @tsc: pointer to the tsc at the slot reservation (out)
1687  * @cpu: cpuid
1688  *
1689  * Return : -ENOSPC if not enough space, else returns 0.
1690  * It will take care of sub-buffer switching.
1691  */
1692 int ltt_reserve_slot_lockless_slow(struct ust_trace *trace,
1693                 struct ust_channel *chan, void **transport_data,
1694                 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
1695                 unsigned int *rflags, int largest_align, int cpu)
1696 {
1697         struct ust_buffer *buf = chan->buf[cpu];
1698         struct ltt_reserve_switch_offsets offsets;
1699
1700         offsets.size = 0;
1701
1702         do {
1703                 if (unlikely(ltt_relay_try_reserve_slow(chan, buf, &offsets,
1704                                 data_size, tsc, rflags, largest_align)))
1705                         return -ENOSPC;
1706         } while (unlikely(uatomic_cmpxchg(&buf->offset, offsets.old,
1707                         offsets.end) != offsets.old));
1708
1709         /*
1710          * Atomically update last_tsc. This update races against concurrent
1711          * atomic updates, but the race will always cause supplementary full TSC
1712          * events, never the opposite (missing a full TSC event when it would be
1713          * needed).
1714          */
1715         save_last_tsc(buf, *tsc);
1716
1717         /*
1718          * Push the reader if necessary
1719          */
1720         ltt_reserve_push_reader(chan, buf, offsets.end - 1);
1721
1722         /*
1723          * Clear noref flag for this subbuffer.
1724          */
1725 //ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(offsets.end - 1, chan));
1726
1727         /*
1728          * Switch old subbuffer if needed.
1729          */
1730         if (unlikely(offsets.end_switch_old)) {
1731 //ust//         ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(offsets.old - 1, chan));
1732                 ltt_reserve_switch_old_subbuf(chan, buf, &offsets, tsc);
1733                 DBG("Switching %s_%d", chan->channel_name, cpu);
1734         }
1735
1736         /*
1737          * Populate new subbuffer.
1738          */
1739         if (unlikely(offsets.begin_switch))
1740                 ltt_reserve_switch_new_subbuf(chan, buf, &offsets, tsc);
1741
1742         if (unlikely(offsets.end_switch_current))
1743                 ltt_reserve_end_switch_current(chan, buf, &offsets, tsc);
1744
1745         *slot_size = offsets.size;
1746         *buf_offset = offsets.begin + offsets.before_hdr_pad;
1747         return 0;
1748 }
1749
1750 static struct ltt_transport ust_relay_transport = {
1751         .name = "ustrelay",
1752         .ops = {
1753                 .create_channel = ust_buffers_create_channel,
1754                 .finish_channel = ltt_relay_finish_channel,
1755                 .remove_channel = ltt_relay_remove_channel,
1756                 .wakeup_channel = ltt_relay_async_wakeup_chan,
1757         },
1758 };
1759
1760 static char initialized = 0;
1761
1762 void __attribute__((constructor)) init_ustrelay_transport(void)
1763 {
1764         if(!initialized) {
1765                 ltt_transport_register(&ust_relay_transport);
1766                 initialized = 1;
1767         }
1768 }
1769
1770 static void __attribute__((destructor)) ust_buffers_exit(void)
1771 {
1772         ltt_transport_unregister(&ust_relay_transport);
1773 }
1774
1775 size_t ltt_write_event_header_slow(struct ust_trace *trace,
1776                 struct ust_channel *channel,
1777                 struct ust_buffer *buf, long buf_offset,
1778                 u16 eID, u32 event_size,
1779                 u64 tsc, unsigned int rflags)
1780 {
1781         struct ltt_event_header header;
1782         u16 small_size;
1783
1784         switch (rflags) {
1785         case LTT_RFLAG_ID_SIZE_TSC:
1786                 header.id_time = 29 << LTT_TSC_BITS;
1787                 break;
1788         case LTT_RFLAG_ID_SIZE:
1789                 header.id_time = 30 << LTT_TSC_BITS;
1790                 break;
1791         case LTT_RFLAG_ID:
1792                 header.id_time = 31 << LTT_TSC_BITS;
1793                 break;
1794         }
1795
1796         header.id_time |= (u32)tsc & LTT_TSC_MASK;
1797         ust_buffers_write(buf, buf_offset, &header, sizeof(header));
1798         buf_offset += sizeof(header);
1799
1800         switch (rflags) {
1801         case LTT_RFLAG_ID_SIZE_TSC:
1802                 small_size = (u16)min_t(u32, event_size, LTT_MAX_SMALL_SIZE);
1803                 ust_buffers_write(buf, buf_offset,
1804                         &eID, sizeof(u16));
1805                 buf_offset += sizeof(u16);
1806                 ust_buffers_write(buf, buf_offset,
1807                         &small_size, sizeof(u16));
1808                 buf_offset += sizeof(u16);
1809                 if (small_size == LTT_MAX_SMALL_SIZE) {
1810                         ust_buffers_write(buf, buf_offset,
1811                                 &event_size, sizeof(u32));
1812                         buf_offset += sizeof(u32);
1813                 }
1814                 buf_offset += ltt_align(buf_offset, sizeof(u64));
1815                 ust_buffers_write(buf, buf_offset,
1816                         &tsc, sizeof(u64));
1817                 buf_offset += sizeof(u64);
1818                 break;
1819         case LTT_RFLAG_ID_SIZE:
1820                 small_size = (u16)min_t(u32, event_size, LTT_MAX_SMALL_SIZE);
1821                 ust_buffers_write(buf, buf_offset,
1822                         &eID, sizeof(u16));
1823                 buf_offset += sizeof(u16);
1824                 ust_buffers_write(buf, buf_offset,
1825                         &small_size, sizeof(u16));
1826                 buf_offset += sizeof(u16);
1827                 if (small_size == LTT_MAX_SMALL_SIZE) {
1828                         ust_buffers_write(buf, buf_offset,
1829                                 &event_size, sizeof(u32));
1830                         buf_offset += sizeof(u32);
1831                 }
1832                 break;
1833         case LTT_RFLAG_ID:
1834                 ust_buffers_write(buf, buf_offset,
1835                         &eID, sizeof(u16));
1836                 buf_offset += sizeof(u16);
1837                 break;
1838         }
1839
1840         return buf_offset;
1841 }