4 * Copyright (C) 2008,2009 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
6 * Dual LGPL v2.1/GPL v2 license.
9 #include <linux/errno.h>
10 #include <linux/stddef.h>
11 #include <linux/slab.h>
12 #include <linux/module.h>
13 #include <linux/string.h>
14 #include <linux/vmalloc.h>
16 #include <linux/cpu.h>
17 #include <linux/bitops.h>
18 #include <linux/delay.h>
20 #include "ltt-relay.h"
21 #include "ltt-tracer.h"
22 #include "ltt-relay-lockless.h" /* for cpu hotplug */
25 * ltt_chanbuf_allocate - allocate a channel buffer
26 * @buf: the buffer struct
27 * @size: total size of the buffer
28 * @n_sb: number of subbuffers
29 * @extra_reader_sb: need extra subbuffer for reader
32 int ltt_chanbuf_allocate(struct ltt_chanbuf_alloc
*buf
, size_t size
,
33 size_t n_sb
, int extra_reader_sb
)
35 long i
, j
, n_pages
, n_pages_per_sb
, page_idx
= 0;
39 n_pages
= size
>> PAGE_SHIFT
;
40 n_pages_per_sb
= n_pages
>> get_count_order(n_sb
);
42 n_pages
+= n_pages_per_sb
; /* Add pages for reader */
44 pages
= kmalloc_node(max_t(size_t, sizeof(*pages
) * n_pages
,
45 1 << INTERNODE_CACHE_SHIFT
),
46 GFP_KERNEL
, cpu_to_node(buf
->cpu
));
50 virt
= kmalloc_node(ALIGN(sizeof(*virt
) * n_pages
,
51 1 << INTERNODE_CACHE_SHIFT
),
52 GFP_KERNEL
, cpu_to_node(buf
->cpu
));
56 for (i
= 0; i
< n_pages
; i
++) {
57 pages
[i
] = alloc_pages_node(cpu_to_node(buf
->cpu
),
58 GFP_KERNEL
| __GFP_ZERO
, 0);
59 if (unlikely(!pages
[i
]))
61 virt
[i
] = page_address(pages
[i
]);
63 buf
->nr_pages
= n_pages
;
67 /* Allocate write-side page index */
68 buf
->buf_wsb
= kzalloc_node(max_t(size_t,
69 sizeof(struct chanbuf_sb
) * n_sb
,
70 1 << INTERNODE_CACHE_SHIFT
),
71 GFP_KERNEL
, cpu_to_node(buf
->cpu
));
72 if (unlikely(!buf
->buf_wsb
))
75 for (i
= 0; i
< n_sb
; i
++) {
76 buf
->buf_wsb
[i
].pages
=
77 kzalloc_node(max_t(size_t,
78 sizeof(struct chanbuf_page
) * n_pages_per_sb
,
79 1 << INTERNODE_CACHE_SHIFT
),
80 GFP_KERNEL
, cpu_to_node(buf
->cpu
));
81 if (!buf
->buf_wsb
[i
].pages
)
85 if (extra_reader_sb
) {
86 /* Allocate read-side page index */
88 kzalloc_node(max_t(size_t,
89 sizeof(struct chanbuf_page
) * n_pages_per_sb
,
90 1 << INTERNODE_CACHE_SHIFT
),
91 GFP_KERNEL
, cpu_to_node(buf
->cpu
));
92 if (unlikely(!buf
->buf_rsb
.pages
))
95 buf
->buf_rsb
.pages
= buf
->buf_wsb
[0].pages
;
98 /* Assign pages to write-side page index */
99 for (i
= 0; i
< n_sb
; i
++) {
100 for (j
= 0; j
< n_pages_per_sb
; j
++) {
101 WARN_ON(page_idx
> n_pages
);
102 buf
->buf_wsb
[i
].pages
[j
].virt
= virt
[page_idx
];
103 buf
->buf_wsb
[i
].pages
[j
].page
= pages
[page_idx
];
106 RCHAN_SB_SET_NOREF(buf
->buf_wsb
[i
].pages
);
109 if (extra_reader_sb
) {
110 for (j
= 0; j
< n_pages_per_sb
; j
++) {
111 WARN_ON(page_idx
> n_pages
);
112 buf
->buf_rsb
.pages
[j
].virt
= virt
[page_idx
];
113 buf
->buf_rsb
.pages
[j
].page
= pages
[page_idx
];
116 RCHAN_SB_SET_NOREF(buf
->buf_rsb
.pages
);
120 * If kmalloc ever uses vmalloc underneath, make sure the buffer pages
127 for (i
= 0; i
< n_sb
; i
++) {
128 RCHAN_SB_CLEAR_NOREF(buf
->buf_wsb
[i
].pages
);
129 kfree(buf
->buf_wsb
[i
].pages
);
134 * Free all pages from [ i - 1 down to 0 ].
135 * If i = 0, don't free anything.
137 for (i
--; i
>= 0; i
--)
138 __free_page(pages
[i
]);
146 int ltt_chanbuf_alloc_create(struct ltt_chanbuf_alloc
*buf
,
147 struct ltt_chan_alloc
*chan
, int cpu
)
151 ret
= ltt_chanbuf_allocate(buf
, chan
->buf_size
, chan
->n_sb
,
152 chan
->extra_reader_sb
);
162 void ltt_chanbuf_alloc_free(struct ltt_chanbuf_alloc
*buf
)
164 struct ltt_chan_alloc
*chan
= buf
->chan
;
169 if (chan
->extra_reader_sb
) {
170 RCHAN_SB_CLEAR_NOREF(buf
->buf_rsb
.pages
);
171 kfree(buf
->buf_rsb
.pages
);
173 for (i
= 0; i
< chan
->n_sb
; i
++) {
174 RCHAN_SB_CLEAR_NOREF(buf
->buf_wsb
[i
].pages
);
175 kfree(buf
->buf_wsb
[i
].pages
);
181 for (i
= 0; i
< buf
->nr_pages
; i
++)
182 __free_page(pages
[i
]);
189 * ltt_relay_hotcpu_callback - CPU hotplug callback
190 * @nb: notifier block
191 * @action: hotplug action to take
194 * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
197 int __cpuinit
ltt_relay_hotcpu_callback(struct notifier_block
*nb
,
198 unsigned long action
,
201 unsigned int cpu
= (unsigned long)hcpu
;
202 struct ltt_trace
*trace
;
203 struct ltt_chan
*chan
;
204 struct ltt_chanbuf
*buf
;
209 case CPU_UP_PREPARE_FROZEN
:
211 * CPU hotplug lock protects trace lock from this callback.
213 __list_for_each_entry_rcu(trace
, <t_traces
.head
, list
) {
214 for (i
= 0; i
< trace
->nr_channels
; i
++) {
215 chan
= &trace
->channels
[i
];
216 buf
= per_cpu_ptr(chan
->a
.buf
, cpu
);
217 ret
= ltt_chanbuf_create(buf
, &chan
->a
, cpu
);
220 "ltt_relay_hotcpu_callback: cpu %d "
221 "buffer creation failed\n", cpu
);
229 case CPU_DEAD_FROZEN
:
230 /* No need to do a buffer switch here, because it will happen
231 * when tracing is stopped, or will be done by switch timer CPU
239 * Must be called with either trace lock or rcu read lock sched held.
241 void ltt_chan_for_each_channel(void (*cb
) (struct ltt_chanbuf
*buf
), int cpu
)
243 struct ltt_trace
*trace
;
244 struct ltt_chan
*chan
;
245 struct ltt_chanbuf
*buf
;
248 __list_for_each_entry_rcu(trace
, <t_traces
.head
, list
) {
249 for (i
= 0; i
< trace
->nr_channels
; i
++) {
250 chan
= &trace
->channels
[i
];
253 buf
= per_cpu_ptr(chan
->a
.buf
, cpu
);
260 * ltt_chan_create - create a new relay channel
263 * @base_filename: base name of files to create
264 * @parent: dentry of parent directory, %NULL for root directory
265 * @sb_size: size of sub-buffers (> PAGE_SIZE, power of 2)
266 * @n_sb: number of sub-buffers (power of 2)
267 * @extra_reader_sb: allocate an extra subbuffer for the reader
268 * @overwrite: channel is in overwrite mode
270 * Returns channel pointer if successful, %NULL otherwise.
272 * Creates per-cpu channel buffers using the sizes and attributes
273 * specified. The created channel buffer files will be named
274 * base_filename_0...base_filename_N-1. File permissions will
277 int ltt_chan_alloc_init(struct ltt_chan_alloc
*chan
, struct ltt_trace
*trace
,
278 const char *base_filename
,
279 struct dentry
*parent
, size_t sb_size
,
280 size_t n_sb
, int extra_reader_sb
, int overwrite
)
288 if (!(sb_size
&& n_sb
))
291 /* Check that the subbuffer size is larger than a page. */
292 WARN_ON_ONCE(sb_size
< PAGE_SIZE
);
295 * Make sure the number of subbuffers and subbuffer size are power of 2.
297 WARN_ON_ONCE(hweight32(sb_size
) != 1);
298 WARN_ON(hweight32(n_sb
) != 1);
301 chan
->buf_size
= n_sb
* sb_size
;
302 chan
->sb_size
= sb_size
;
303 chan
->sb_size_order
= get_count_order(sb_size
);
304 chan
->n_sb_order
= get_count_order(n_sb
);
305 chan
->extra_reader_sb
= extra_reader_sb
;
307 chan
->parent
= parent
;
308 strlcpy(chan
->filename
, base_filename
, NAME_MAX
);
309 kref_init(&chan
->kref
);
310 kref_get(&chan
->trace
->kref
);
312 /* Allocating the child structure */
313 chan
->buf
= alloc_percpu(struct ltt_chanbuf
);
317 for_each_online_cpu(i
) {
318 ret
= ltt_chanbuf_create(per_cpu_ptr(chan
->buf
, i
), chan
, i
);
326 for_each_possible_cpu(i
) {
327 struct ltt_chanbuf
*buf
= per_cpu_ptr(chan
->buf
, i
);
329 if (!buf
->a
.allocated
)
331 ltt_chanbuf_remove_file(buf
);
332 ltt_chanbuf_free(buf
);
334 free_percpu(chan
->buf
);
336 kref_put(&chan
->kref
, ltt_chan_free
);
341 * ltt_chan_alloc_remove_files - remove channel files.
344 * Remove all channel files and wait for dentry use counts to become zero.
346 void ltt_chan_alloc_remove_files(struct ltt_chan_alloc
*chan
)
349 struct dentry
*dentry
;
351 for_each_possible_cpu(i
) {
352 struct ltt_chanbuf
*buf
= per_cpu_ptr(chan
->buf
, i
);
354 if (!buf
->a
.allocated
)
356 dentry
= dget(buf
->a
.dentry
);
357 ltt_chanbuf_remove_file(buf
);
358 /* TODO: wait / wakeup instead */
360 * Wait for every reference to the dentry to be gone,
363 while (atomic_read(&dentry
->d_count
) != 1)
370 * ltt_chan_alloc_free - destroy the channel
373 * Destroy all channel buffers and frees the channel.
375 void ltt_chan_alloc_free(struct ltt_chan_alloc
*chan
)
379 for_each_possible_cpu(i
) {
380 struct ltt_chanbuf
*buf
= per_cpu_ptr(chan
->buf
, i
);
382 if (!buf
->a
.allocated
)
384 ltt_chanbuf_free(buf
);
386 free_percpu(chan
->buf
);
387 kref_put(&chan
->trace
->kref
, ltt_release_trace
);
388 wake_up_interruptible(&chan
->trace
->kref_wq
);
392 * _ltt_relay_write - write data to a ltt_relay buffer.
394 * @offset : offset within the buffer
395 * @src : source address
396 * @len : length to write
397 * @pagecpy : page size copied so far
399 void _ltt_relay_write(struct ltt_chanbuf_alloc
*bufa
, size_t offset
,
400 const void *src
, size_t len
, ssize_t pagecpy
)
402 struct ltt_chan_alloc
*chana
= bufa
->chan
;
404 struct chanbuf_page
*rpages
;
410 sbidx
= offset
>> chana
->sb_size_order
;
411 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
414 * Underlying layer should never ask for writes across
417 WARN_ON(offset
>= chana
->buf_size
);
419 pagecpy
= min_t(size_t, len
, PAGE_SIZE
- (offset
& ~PAGE_MASK
));
420 rpages
= bufa
->buf_wsb
[sbidx
].pages
;
421 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
422 ltt_relay_do_copy(rpages
[index
].virt
+ (offset
& ~PAGE_MASK
),
424 } while (unlikely(len
!= pagecpy
));
426 EXPORT_SYMBOL_GPL(_ltt_relay_write
);
429 * _ltt_relay_strncpy_fixup - Fix an incomplete string in a ltt_relay buffer.
431 * @offset : offset within the buffer
432 * @len : length to write
433 * @copied: string actually copied
434 * @terminated: does string end with \0
436 * Fills string with "X" if incomplete.
438 void _ltt_relay_strncpy_fixup(struct ltt_chanbuf_alloc
*bufa
, size_t offset
,
439 size_t len
, size_t copied
, int terminated
)
441 struct ltt_chan_alloc
*chana
= bufa
->chan
;
444 struct chanbuf_page
*rpages
;
448 * Deal with non-terminated string.
450 WARN_ON_ONCE(terminated
);
451 offset
+= copied
- 1;
452 sbidx
= offset
>> chana
->sb_size_order
;
453 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
455 * Underlying layer should never ask for writes across
458 WARN_ON(offset
>= chana
->buf_size
);
459 rpages
= bufa
->buf_wsb
[sbidx
].pages
;
460 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
461 ltt_relay_do_memset(rpages
[index
].virt
+ (offset
& ~PAGE_MASK
),
467 * Deal with incomplete string.
468 * Overwrite string's \0 with X too.
470 pagecpy
= copied
- 1;
472 WARN_ON_ONCE(!terminated
);
475 sbidx
= offset
>> chana
->sb_size_order
;
476 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
479 * Underlying layer should never ask for writes across
482 WARN_ON(offset
>= chana
->buf_size
);
484 pagecpy
= min_t(size_t, len
, PAGE_SIZE
- (offset
& ~PAGE_MASK
));
485 rpages
= bufa
->buf_wsb
[sbidx
].pages
;
486 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
487 ltt_relay_do_memset(rpages
[index
].virt
+ (offset
& ~PAGE_MASK
),
489 } while (unlikely(len
!= pagecpy
));
491 * Overwrite last 'X' with '\0'.
493 offset
+= pagecpy
- 1;
494 sbidx
= offset
>> chana
->sb_size_order
;
495 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
497 * Underlying layer should never ask for writes across subbuffers.
499 WARN_ON(offset
>= chana
->buf_size
);
500 rpages
= bufa
->buf_wsb
[sbidx
].pages
;
501 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
502 ltt_relay_do_memset(rpages
[index
].virt
+ (offset
& ~PAGE_MASK
),
505 EXPORT_SYMBOL_GPL(_ltt_relay_strncpy_fixup
);
508 * _ltt_relay_strncpy - copy a string to a ltt_relay buffer.
510 * @offset : offset within the buffer
511 * @src : source address
512 * @len : length to write
513 * @pagecpy : page size copied so far
515 void _ltt_relay_strncpy(struct ltt_chanbuf_alloc
*bufa
, size_t offset
,
516 const void *src
, size_t len
, ssize_t pagecpy
)
518 struct ltt_chan_alloc
*chana
= bufa
->chan
;
519 size_t sbidx
, index
, copied
;
520 struct chanbuf_page
*rpages
;
527 sbidx
= offset
>> chana
->sb_size_order
;
528 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
531 * Underlying layer should never ask for writes across
534 WARN_ON(offset
>= chana
->buf_size
);
536 pagecpy
= min_t(size_t, len
, PAGE_SIZE
- (offset
& ~PAGE_MASK
));
537 rpages
= bufa
->buf_wsb
[sbidx
].pages
;
538 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
539 copied
= ltt_relay_do_strncpy(rpages
[index
].virt
540 + (offset
& ~PAGE_MASK
),
541 src
, pagecpy
, &terminated
);
542 if (copied
< pagecpy
|| ((len
== pagecpy
) && !terminated
)) {
543 _ltt_relay_strncpy_fixup(bufa
, offset
, len
, copied
,
547 } while (unlikely(len
!= pagecpy
));
549 EXPORT_SYMBOL_GPL(_ltt_relay_strncpy
);
552 * ltt_relay_read - read data from ltt_relay_buffer.
554 * @offset : offset within the buffer
555 * @dest : destination address
556 * @len : length to write
558 * Should be protected by get_subbuf/put_subbuf.
560 int ltt_relay_read(struct ltt_chanbuf_alloc
*bufa
, size_t offset
, void *dest
,
563 struct ltt_chan_alloc
*chana
= bufa
->chan
;
565 ssize_t pagecpy
, orig_len
;
566 struct chanbuf_page
*rpages
;
569 offset
&= chana
->buf_size
- 1;
570 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
574 pagecpy
= min_t(size_t, len
, PAGE_SIZE
- (offset
& ~PAGE_MASK
));
575 rpages
= bufa
->buf_rsb
.pages
;
576 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
577 memcpy(dest
, rpages
[index
].virt
+ (offset
& ~PAGE_MASK
),
584 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
586 * Underlying layer should never ask for reads across
589 WARN_ON(offset
>= chana
->buf_size
);
593 EXPORT_SYMBOL_GPL(ltt_relay_read
);
596 * ltt_relay_read_cstr - read a C-style string from ltt_relay_buffer.
598 * @offset : offset within the buffer
599 * @dest : destination address
600 * @len : destination's length
602 * return string's length
603 * Should be protected by get_subbuf/put_subbuf.
605 int ltt_relay_read_cstr(struct ltt_chanbuf_alloc
*bufa
, size_t offset
,
606 void *dest
, size_t len
)
608 struct ltt_chan_alloc
*chana
= bufa
->chan
;
610 ssize_t pagecpy
, pagelen
, strpagelen
, orig_offset
;
612 struct chanbuf_page
*rpages
;
614 offset
&= chana
->buf_size
- 1;
615 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
616 orig_offset
= offset
;
618 rpages
= bufa
->buf_rsb
.pages
;
619 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
620 str
= (char *)rpages
[index
].virt
+ (offset
& ~PAGE_MASK
);
621 pagelen
= PAGE_SIZE
- (offset
& ~PAGE_MASK
);
622 strpagelen
= strnlen(str
, pagelen
);
624 pagecpy
= min_t(size_t, len
, strpagelen
);
626 memcpy(dest
, str
, pagecpy
);
631 offset
+= strpagelen
;
632 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
633 if (strpagelen
< pagelen
)
636 * Underlying layer should never ask for reads across
639 WARN_ON(offset
>= chana
->buf_size
);
642 ((char *)dest
)[0] = 0;
643 return offset
- orig_offset
;
645 EXPORT_SYMBOL_GPL(ltt_relay_read_cstr
);
648 * ltt_relay_read_get_page - Get a whole page to read from
650 * @offset : offset within the buffer
652 * Should be protected by get_subbuf/put_subbuf.
654 struct page
*ltt_relay_read_get_page(struct ltt_chanbuf_alloc
*bufa
,
658 struct chanbuf_page
*rpages
;
659 struct ltt_chan_alloc
*chana
= bufa
->chan
;
661 offset
&= chana
->buf_size
- 1;
662 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
663 rpages
= bufa
->buf_rsb
.pages
;
664 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
665 return rpages
[index
].page
;
667 EXPORT_SYMBOL_GPL(ltt_relay_read_get_page
);
670 * ltt_relay_read_offset_address - get address of a location within the buffer
672 * @offset : offset within the buffer.
674 * Return the address where a given offset is located (for read).
675 * Should be used to get the current subbuffer header pointer. Given we know
676 * it's never on a page boundary, it's safe to write directly to this address,
677 * as long as the write is never bigger than a page size.
679 void *ltt_relay_read_offset_address(struct ltt_chanbuf_alloc
*bufa
,
683 struct chanbuf_page
*rpages
;
684 struct ltt_chan_alloc
*chana
= bufa
->chan
;
686 offset
&= chana
->buf_size
- 1;
687 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
688 rpages
= bufa
->buf_rsb
.pages
;
689 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
690 return rpages
[index
].virt
+ (offset
& ~PAGE_MASK
);
692 EXPORT_SYMBOL_GPL(ltt_relay_read_offset_address
);
695 * ltt_relay_offset_address - get address of a location within the buffer
697 * @offset : offset within the buffer.
699 * Return the address where a given offset is located.
700 * Should be used to get the current subbuffer header pointer. Given we know
701 * it's never on a page boundary, it's safe to write directly to this address,
702 * as long as the write is never bigger than a page size.
704 void *ltt_relay_offset_address(struct ltt_chanbuf_alloc
*bufa
, size_t offset
)
707 struct chanbuf_page
*rpages
;
708 struct ltt_chan_alloc
*chana
= bufa
->chan
;
710 offset
&= chana
->buf_size
- 1;
711 sbidx
= offset
>> chana
->sb_size_order
;
712 index
= (offset
& (chana
->sb_size
- 1)) >> PAGE_SHIFT
;
713 rpages
= bufa
->buf_wsb
[sbidx
].pages
;
714 WARN_ON_ONCE(RCHAN_SB_IS_NOREF(rpages
));
715 return rpages
[index
].virt
+ (offset
& ~PAGE_MASK
);
717 EXPORT_SYMBOL_GPL(ltt_relay_offset_address
);
719 static __init
int ltt_relay_alloc_init(void)
721 hotcpu_notifier(ltt_relay_hotcpu_callback
, 5);
727 static void __exit
ltt_relay_alloc_exit(void)
733 module_init(ltt_relay_alloc_init
);
734 module_exit(ltt_relay_alloc_exit
);