local_set(&bpage->commit, 0);
}
+static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
+{
+ return local_read(&bpage->page->commit);
+}
+
static void free_buffer_page(struct buffer_page *bpage)
{
free_page((unsigned long)bpage->page);
*cnt = rb_time_cnt(top);
- /* If top and bottom counts don't match, this interrupted a write */
- if (*cnt != rb_time_cnt(bottom))
+ /* If top and msb counts don't match, this interrupted a write */
+ if (*cnt != rb_time_cnt(msb))
return false;
/* The shift to msb will lose its cnt bits */
unsigned long cnt2, top2, bottom2, msb2;
u64 val;
+ /* Any interruptions in this function should cause a failure */
+ cnt = local_read(&t->cnt);
+
/* The cmpxchg always fails if it interrupted an update */
if (!__rb_time_read(t, &val, &cnt2))
return false;
if (val != expect)
return false;
- cnt = local_read(&t->cnt);
if ((cnt & 3) != cnt2)
return false;
cnt2 = cnt + 1;
rb_time_split(val, &top, &bottom, &msb);
+ msb = rb_time_val_cnt(msb, cnt);
top = rb_time_val_cnt(top, cnt);
bottom = rb_time_val_cnt(bottom, cnt);
rb_time_split(set, &top2, &bottom2, &msb2);
+ msb2 = rb_time_val_cnt(msb2, cnt);
top2 = rb_time_val_cnt(top2, cnt2);
bottom2 = rb_time_val_cnt(bottom2, cnt2);
if (full) {
poll_wait(filp, &work->full_waiters, poll_table);
work->full_waiters_pending = true;
+ if (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full > full)
+ cpu_buffer->shortest_full = full;
} else {
poll_wait(filp, &work->waiters, poll_table);
work->waiters_pending = true;
free_buffer_page(bpage);
}
+ free_page((unsigned long)cpu_buffer->free_page);
+
kfree(cpu_buffer);
}
* Increment overrun to account for the lost events.
*/
local_add(page_entries, &cpu_buffer->overrun);
- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
}
err = -ENOMEM;
goto out_err;
}
+
+ cond_resched();
}
cpus_read_lock();
cpu_buffer->reader_page->read);
}
-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
-{
- return local_read(&bpage->page->commit);
-}
-
static struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{
*/
commit = rb_page_commit(iter_head_page);
smp_rmb();
+
+ /* An event needs to be at least 8 bytes in size */
+ if (iter->head > commit - 8)
+ goto reset;
+
event = __rb_page_index(iter_head_page, iter->head);
length = rb_event_length(event);
*/
barrier();
- if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
+ if ((iter->head + length) > commit || length > BUF_PAGE_SIZE)
/* Writer corrupted the read? */
goto reset;
* the counters.
*/
local_add(entries, &cpu_buffer->overrun);
- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
/*
event = __rb_page_index(tail_page, tail);
- /* account for padding bytes */
- local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
-
/*
* Save the original length to the meta data.
* This will be used by the reader to add lost event
* write counter enough to allow another writer to slip
* in on this page.
* We put in a discarded commit instead, to make sure
- * that this space is not used again.
+ * that this space is not used again, and this space will
+ * not be accounted into 'entries_bytes'.
*
* If we are less than the minimum size, we don't need to
* worry about it.
/* time delta must be non zero */
event->time_delta = 1;
+ /* account for padding bytes */
+ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+
/* Make sure the padding is visible before the tail_page->write update */
smp_wmb();
local_read(&bpage->write) & ~RB_WRITE_MASK;
unsigned long event_length = rb_event_length(event);
+ /*
+ * For the before_stamp to be different than the write_stamp
+ * to make sure that the next event adds an absolute
+ * value and does not rely on the saved write stamp, which
+ * is now going to be bogus.
+ */
+ rb_time_set(&cpu_buffer->before_stamp, 0);
+
/* Something came in, can't discard */
if (!rb_time_cmpxchg(&cpu_buffer->write_stamp,
write_stamp, write_stamp - delta))
return false;
/*
- * It's possible that the event time delta is zero
- * (has the same time stamp as the previous event)
- * in which case write_stamp and before_stamp could
- * be the same. In such a case, force before_stamp
- * to be different than write_stamp. It doesn't
- * matter what it is, as long as its different.
- */
- if (!delta)
- rb_time_set(&cpu_buffer->before_stamp, 0);
-
- /*
* If an event were to come in now, it would see that the
* write_stamp and the before_stamp are different, and assume
* that this event just added itself before updating
* absolute timestamp.
* Don't bother if this is the start of a new page (w == 0).
*/
- if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) {
+ if (!w) {
+ /* Use the sub-buffer timestamp */
+ info->delta = 0;
+ } else if (unlikely(!a_ok || !b_ok || info->before != info->after)) {
info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND;
info->length += RB_LEN_TIME_EXTEND;
} else {
/* See if we shot pass the end of this buffer page */
if (unlikely(write > BUF_PAGE_SIZE)) {
- /* before and after may now different, fix it up*/
- b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
- a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
- if (a_ok && b_ok && info->before != info->after)
- (void)rb_time_cmpxchg(&cpu_buffer->before_stamp,
- info->before, info->after);
- if (a_ok && b_ok)
- check_buffer(cpu_buffer, info, CHECK_FULL_PAGE);
+ check_buffer(cpu_buffer, info, CHECK_FULL_PAGE);
return rb_move_tail(cpu_buffer, tail, info);
}
if (likely(tail == w)) {
- u64 save_before;
- bool s_ok;
-
/* Nothing interrupted us between A and C */
/*D*/ rb_time_set(&cpu_buffer->write_stamp, info->ts);
- barrier();
- /*E*/ s_ok = rb_time_read(&cpu_buffer->before_stamp, &save_before);
- RB_WARN_ON(cpu_buffer, !s_ok);
+ /*
+ * If something came in between C and D, the write stamp
+ * may now not be in sync. But that's fine as the before_stamp
+ * will be different and then next event will just be forced
+ * to use an absolute timestamp.
+ */
if (likely(!(info->add_timestamp &
(RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE))))
/* This did not interrupt any time update */
else
/* Just use full timestamp for interrupting event */
info->delta = info->ts;
- barrier();
check_buffer(cpu_buffer, info, tail);
- if (unlikely(info->ts != save_before)) {
- /* SLOW PATH - Interrupted between C and E */
-
- a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
- RB_WARN_ON(cpu_buffer, !a_ok);
-
- /* Write stamp must only go forward */
- if (save_before > info->after) {
- /*
- * We do not care about the result, only that
- * it gets updated atomically.
- */
- (void)rb_time_cmpxchg(&cpu_buffer->write_stamp,
- info->after, save_before);
- }
- }
} else {
u64 ts;
/* SLOW PATH - Interrupted between A and C */
if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
add_ts_default = RB_ADD_STAMP_ABSOLUTE;
info.length += RB_LEN_TIME_EXTEND;
+ if (info.length > BUF_MAX_DATA_SIZE)
+ goto out_fail;
} else {
add_ts_default = RB_ADD_STAMP_NONE;
}
EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
/**
- * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
+ * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer
* @buffer: The ring buffer
* @cpu: The per CPU buffer to read from.
*/
length = rb_event_length(event);
cpu_buffer->reader_page->read += length;
+ cpu_buffer->read_bytes += length;
}
static void rb_advance_iter(struct ring_buffer_iter *iter)
if (!iter)
return NULL;
- iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags);
+ /* Holds the entire event: data and meta data */
+ iter->event = kmalloc(BUF_PAGE_SIZE, flags);
if (!iter->event) {
kfree(iter);
return NULL;
} else {
/* update the entry counter */
cpu_buffer->read += rb_page_entries(reader);
- cpu_buffer->read_bytes += BUF_PAGE_SIZE;
+ cpu_buffer->read_bytes += rb_page_commit(reader);
/* swap the pages */
rb_init_page(bpage);