tizen 2.4 release
[external/systemd.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/xattr.h>
30
31 #include "journal-def.h"
32 #include "journal-file.h"
33 #include "journal-authenticate.h"
34 #include "lookup3.h"
35 #include "compress.h"
36 #include "fsprg.h"
37
38 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
39 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40
41 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42
43 /* This is the minimum journal file size */
44 #ifdef CONFIG_TIZEN
45 #define JOURNAL_FILE_SIZE_MIN (512ULL*1024ULL)                 /* 512 KiB */
46 #else
47 #define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL)           /* 4 MiB */
48 #endif
49
50 /* These are the lower and upper bounds if we deduce the max_use value
51  * from the file system size */
52 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
53 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
54
55 /* This is the upper bound if we deduce max_size from max_use */
56 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
57
58 /* This is the upper bound if we deduce the keep_free value from the
59  * file system size */
60 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
61
62 /* This is the keep_free value when we can't determine the system
63  * size */
64 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
65
66 /* n_data was the first entry we added after the initial file format design */
67 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
68
69 /* How many entries to keep in the entry array chain cache at max */
70 #define CHAIN_CACHE_MAX 20
71
72 /* How much to increase the journal file size at once each time we allocate something new. */
73 #ifdef CONFIG_TIZEN
74 #define FILE_SIZE_INCREASE (JOURNAL_FILE_SIZE_MIN)
75 #else
76 #define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */
77 #endif
78
79 static int journal_file_set_online(JournalFile *f) {
80         assert(f);
81
82         if (!f->writable)
83                 return -EPERM;
84
85         if (!(f->fd >= 0 && f->header))
86                 return -EINVAL;
87
88         switch(f->header->state) {
89                 case STATE_ONLINE:
90                         return 0;
91
92                 case STATE_OFFLINE:
93                         f->header->state = STATE_ONLINE;
94                         fsync(f->fd);
95                         return 0;
96
97                 default:
98                         return -EINVAL;
99         }
100 }
101
102 int journal_file_set_offline(JournalFile *f) {
103         assert(f);
104
105         if (!f->writable)
106                 return -EPERM;
107
108         if (!(f->fd >= 0 && f->header))
109                 return -EINVAL;
110
111         if (f->header->state != STATE_ONLINE)
112                 return 0;
113
114         fsync(f->fd);
115
116         f->header->state = STATE_OFFLINE;
117
118         fsync(f->fd);
119
120         return 0;
121 }
122
123 void journal_file_close(JournalFile *f) {
124         assert(f);
125
126 #ifdef HAVE_GCRYPT
127         /* Write the final tag */
128         if (f->seal && f->writable)
129                 journal_file_append_tag(f);
130 #endif
131
132         /* Sync everything to disk, before we mark the file offline */
133         if (f->mmap && f->fd >= 0)
134                 mmap_cache_close_fd(f->mmap, f->fd);
135
136         journal_file_set_offline(f);
137
138         if (f->header)
139                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
140
141         safe_close(f->fd);
142         free(f->path);
143
144         if (f->mmap)
145                 mmap_cache_unref(f->mmap);
146
147         hashmap_free_free(f->chain_cache);
148
149 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
150         free(f->compress_buffer);
151 #endif
152
153 #ifdef HAVE_GCRYPT
154         if (f->fss_file)
155                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
156         else if (f->fsprg_state)
157                 free(f->fsprg_state);
158
159         free(f->fsprg_seed);
160
161         if (f->hmac)
162                 gcry_md_close(f->hmac);
163 #endif
164
165         free(f);
166 }
167
168 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
169         Header h = {};
170         ssize_t k;
171         int r;
172
173         assert(f);
174
175         memcpy(h.signature, HEADER_SIGNATURE, 8);
176         h.header_size = htole64(ALIGN64(sizeof(h)));
177
178         h.incompatible_flags |= htole32(
179                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
180                 f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
181
182         h.compatible_flags = htole32(
183                 f->seal * HEADER_COMPATIBLE_SEALED);
184
185         r = sd_id128_randomize(&h.file_id);
186         if (r < 0)
187                 return r;
188
189         if (template) {
190                 h.seqnum_id = template->header->seqnum_id;
191                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
192         } else
193                 h.seqnum_id = h.file_id;
194
195         k = pwrite(f->fd, &h, sizeof(h), 0);
196         if (k < 0)
197                 return -errno;
198
199         if (k != sizeof(h))
200                 return -EIO;
201
202         return 0;
203 }
204
205 static int journal_file_refresh_header(JournalFile *f) {
206         int r;
207         sd_id128_t boot_id;
208
209         assert(f);
210
211         r = sd_id128_get_machine(&f->header->machine_id);
212         if (r < 0)
213                 return r;
214
215         r = sd_id128_get_boot(&boot_id);
216         if (r < 0)
217                 return r;
218
219         if (sd_id128_equal(boot_id, f->header->boot_id))
220                 f->tail_entry_monotonic_valid = true;
221
222         f->header->boot_id = boot_id;
223
224         journal_file_set_online(f);
225
226         /* Sync the online state to disk */
227         fsync(f->fd);
228
229         return 0;
230 }
231
232 static int journal_file_verify_header(JournalFile *f) {
233         uint32_t flags;
234
235         assert(f);
236
237         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
238                 return -EBADMSG;
239
240         /* In both read and write mode we refuse to open files with
241          * incompatible flags we don't know */
242         flags = le32toh(f->header->incompatible_flags);
243         if (flags & ~HEADER_INCOMPATIBLE_SUPPORTED) {
244                 if (flags & ~HEADER_INCOMPATIBLE_ANY)
245                         log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
246                                   f->path, flags & ~HEADER_INCOMPATIBLE_ANY);
247                 flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
248                 if (flags)
249                         log_debug("Journal file %s uses incompatible flags %"PRIx32
250                                   " disabled at compilation time.", f->path, flags);
251                 return -EPROTONOSUPPORT;
252         }
253
254         /* When open for writing we refuse to open files with
255          * compatible flags, too */
256         flags = le32toh(f->header->compatible_flags);
257         if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
258                 if (flags & ~HEADER_COMPATIBLE_ANY)
259                         log_debug("Journal file %s has unknown compatible flags %"PRIx32,
260                                   f->path, flags & ~HEADER_COMPATIBLE_ANY);
261                 flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
262                 if (flags)
263                         log_debug("Journal file %s uses compatible flags %"PRIx32
264                                   " disabled at compilation time.", f->path, flags);
265                 return -EPROTONOSUPPORT;
266         }
267
268         if (f->header->state >= _STATE_MAX)
269                 return -EBADMSG;
270
271         /* The first addition was n_data, so check that we are at least this large */
272         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
273                 return -EBADMSG;
274
275         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
276                 return -EBADMSG;
277
278         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
279                 return -ENODATA;
280
281         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
282                 return -ENODATA;
283
284         if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
285             !VALID64(le64toh(f->header->field_hash_table_offset)) ||
286             !VALID64(le64toh(f->header->tail_object_offset)) ||
287             !VALID64(le64toh(f->header->entry_array_offset)))
288                 return -ENODATA;
289
290         if (f->writable) {
291                 uint8_t state;
292                 sd_id128_t machine_id;
293                 int r;
294
295                 r = sd_id128_get_machine(&machine_id);
296                 if (r < 0)
297                         return r;
298
299                 if (!sd_id128_equal(machine_id, f->header->machine_id))
300                         return -EHOSTDOWN;
301
302                 state = f->header->state;
303
304                 if (state == STATE_ONLINE) {
305                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
306                         return -EBUSY;
307                 } else if (state == STATE_ARCHIVED)
308                         return -ESHUTDOWN;
309                 else if (state != STATE_OFFLINE) {
310                         log_debug("Journal file %s has unknown state %u.", f->path, state);
311                         return -EBUSY;
312                 }
313         }
314
315         f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
316         f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
317
318         f->seal = JOURNAL_HEADER_SEALED(f->header);
319
320         return 0;
321 }
322
323 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
324         uint64_t old_size, new_size;
325         int r;
326
327         assert(f);
328
329         /* We assume that this file is not sparse, and we know that
330          * for sure, since we always call posix_fallocate()
331          * ourselves */
332
333         old_size =
334                 le64toh(f->header->header_size) +
335                 le64toh(f->header->arena_size);
336
337         new_size = PAGE_ALIGN(offset + size);
338         if (new_size < le64toh(f->header->header_size))
339                 new_size = le64toh(f->header->header_size);
340
341         if (new_size <= old_size)
342                 return 0;
343
344         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
345                 return -E2BIG;
346
347         if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
348                 struct statvfs svfs;
349
350                 if (fstatvfs(f->fd, &svfs) >= 0) {
351                         uint64_t available;
352
353                         available = svfs.f_bfree * svfs.f_bsize;
354
355                         if (available >= f->metrics.keep_free)
356                                 available -= f->metrics.keep_free;
357                         else
358                                 available = 0;
359
360                         if (new_size - old_size > available)
361                                 return -E2BIG;
362                 }
363         }
364
365         /* Increase by larger blocks at once */
366         new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
367         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
368                 new_size = f->metrics.max_size;
369
370         /* Note that the glibc fallocate() fallback is very
371            inefficient, hence we try to minimize the allocation area
372            as we can. */
373         r = posix_fallocate(f->fd, old_size, new_size - old_size);
374         if (r != 0)
375                 return -r;
376
377         if (fstat(f->fd, &f->last_stat) < 0)
378                 return -errno;
379
380         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
381
382         return 0;
383 }
384
385 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
386         assert(f);
387         assert(ret);
388
389         if (size <= 0)
390                 return -EINVAL;
391
392         /* Avoid SIGBUS on invalid accesses */
393         if (offset + size > (uint64_t) f->last_stat.st_size) {
394                 /* Hmm, out of range? Let's refresh the fstat() data
395                  * first, before we trust that check. */
396
397                 if (fstat(f->fd, &f->last_stat) < 0 ||
398                     offset + size > (uint64_t) f->last_stat.st_size)
399                         return -EADDRNOTAVAIL;
400         }
401
402         return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
403 }
404
405 static uint64_t minimum_header_size(Object *o) {
406
407         static const uint64_t table[] = {
408                 [OBJECT_DATA] = sizeof(DataObject),
409                 [OBJECT_FIELD] = sizeof(FieldObject),
410                 [OBJECT_ENTRY] = sizeof(EntryObject),
411                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
412                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
413                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
414                 [OBJECT_TAG] = sizeof(TagObject),
415         };
416
417         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
418                 return sizeof(ObjectHeader);
419
420         return table[o->object.type];
421 }
422
423 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
424         int r;
425         void *t;
426         Object *o;
427         uint64_t s;
428
429         assert(f);
430         assert(ret);
431
432         /* Objects may only be located at multiple of 64 bit */
433         if (!VALID64(offset))
434                 return -EFAULT;
435
436
437         r = journal_file_move_to(f, type_to_context(type), false, offset, sizeof(ObjectHeader), &t);
438         if (r < 0)
439                 return r;
440
441         o = (Object*) t;
442         s = le64toh(o->object.size);
443
444         if (s < sizeof(ObjectHeader))
445                 return -EBADMSG;
446
447         if (o->object.type <= OBJECT_UNUSED)
448                 return -EBADMSG;
449
450         if (s < minimum_header_size(o))
451                 return -EBADMSG;
452
453         if (type > 0 && o->object.type != type)
454                 return -EBADMSG;
455
456         if (s > sizeof(ObjectHeader)) {
457                 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
458                 if (r < 0)
459                         return r;
460
461                 o = (Object*) t;
462         }
463
464         *ret = o;
465         return 0;
466 }
467
468 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
469         uint64_t r;
470
471         assert(f);
472
473         r = le64toh(f->header->tail_entry_seqnum) + 1;
474
475         if (seqnum) {
476                 /* If an external seqnum counter was passed, we update
477                  * both the local and the external one, and set it to
478                  * the maximum of both */
479
480                 if (*seqnum + 1 > r)
481                         r = *seqnum + 1;
482
483                 *seqnum = r;
484         }
485
486         f->header->tail_entry_seqnum = htole64(r);
487
488         if (f->header->head_entry_seqnum == 0)
489                 f->header->head_entry_seqnum = htole64(r);
490
491         return r;
492 }
493
494 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
495         int r;
496         uint64_t p;
497         Object *tail, *o;
498         void *t;
499
500         assert(f);
501         assert(type > 0 && type < _OBJECT_TYPE_MAX);
502         assert(size >= sizeof(ObjectHeader));
503         assert(offset);
504         assert(ret);
505
506         r = journal_file_set_online(f);
507         if (r < 0)
508                 return r;
509
510         p = le64toh(f->header->tail_object_offset);
511         if (p == 0)
512                 p = le64toh(f->header->header_size);
513         else {
514                 r = journal_file_move_to_object(f, -1, p, &tail);
515                 if (r < 0)
516                         return r;
517
518                 p += ALIGN64(le64toh(tail->object.size));
519         }
520
521         r = journal_file_allocate(f, p, size);
522         if (r < 0)
523                 return r;
524
525         r = journal_file_move_to(f, type, false, p, size, &t);
526         if (r < 0)
527                 return r;
528
529         o = (Object*) t;
530
531         zero(o->object);
532         o->object.type = type;
533         o->object.size = htole64(size);
534
535         f->header->tail_object_offset = htole64(p);
536         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
537
538         *ret = o;
539         *offset = p;
540
541         return 0;
542 }
543
544 static int journal_file_setup_data_hash_table(JournalFile *f) {
545         uint64_t s, p;
546         Object *o;
547         int r;
548
549         assert(f);
550
551         /* We estimate that we need 1 hash table entry per 768 of
552            journal file and we want to make sure we never get beyond
553            75% fill level. Calculate the hash table size for the
554            maximum file size based on these metrics. */
555
556         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
557         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
558                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
559
560         log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
561
562         r = journal_file_append_object(f,
563                                        OBJECT_DATA_HASH_TABLE,
564                                        offsetof(Object, hash_table.items) + s,
565                                        &o, &p);
566         if (r < 0)
567                 return r;
568
569         memzero(o->hash_table.items, s);
570
571         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
572         f->header->data_hash_table_size = htole64(s);
573
574         return 0;
575 }
576
577 static int journal_file_setup_field_hash_table(JournalFile *f) {
578         uint64_t s, p;
579         Object *o;
580         int r;
581
582         assert(f);
583
584         /* We use a fixed size hash table for the fields as this
585          * number should grow very slowly only */
586
587         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
588         r = journal_file_append_object(f,
589                                        OBJECT_FIELD_HASH_TABLE,
590                                        offsetof(Object, hash_table.items) + s,
591                                        &o, &p);
592         if (r < 0)
593                 return r;
594
595         memzero(o->hash_table.items, s);
596
597         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
598         f->header->field_hash_table_size = htole64(s);
599
600         return 0;
601 }
602
603 static int journal_file_map_data_hash_table(JournalFile *f) {
604         uint64_t s, p;
605         void *t;
606         int r;
607
608         assert(f);
609
610         p = le64toh(f->header->data_hash_table_offset);
611         s = le64toh(f->header->data_hash_table_size);
612
613         r = journal_file_move_to(f,
614                                  OBJECT_DATA_HASH_TABLE,
615                                  true,
616                                  p, s,
617                                  &t);
618         if (r < 0)
619                 return r;
620
621         f->data_hash_table = t;
622         return 0;
623 }
624
625 static int journal_file_map_field_hash_table(JournalFile *f) {
626         uint64_t s, p;
627         void *t;
628         int r;
629
630         assert(f);
631
632         p = le64toh(f->header->field_hash_table_offset);
633         s = le64toh(f->header->field_hash_table_size);
634
635         r = journal_file_move_to(f,
636                                  OBJECT_FIELD_HASH_TABLE,
637                                  true,
638                                  p, s,
639                                  &t);
640         if (r < 0)
641                 return r;
642
643         f->field_hash_table = t;
644         return 0;
645 }
646
647 static int journal_file_link_field(
648                 JournalFile *f,
649                 Object *o,
650                 uint64_t offset,
651                 uint64_t hash) {
652
653         uint64_t p, h;
654         int r;
655
656         assert(f);
657         assert(o);
658         assert(offset > 0);
659
660         if (o->object.type != OBJECT_FIELD)
661                 return -EINVAL;
662
663         /* This might alter the window we are looking at */
664
665         o->field.next_hash_offset = o->field.head_data_offset = 0;
666
667         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
668         p = le64toh(f->field_hash_table[h].tail_hash_offset);
669         if (p == 0)
670                 f->field_hash_table[h].head_hash_offset = htole64(offset);
671         else {
672                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
673                 if (r < 0)
674                         return r;
675
676                 o->field.next_hash_offset = htole64(offset);
677         }
678
679         f->field_hash_table[h].tail_hash_offset = htole64(offset);
680
681         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
682                 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
683
684         return 0;
685 }
686
687 static int journal_file_link_data(
688                 JournalFile *f,
689                 Object *o,
690                 uint64_t offset,
691                 uint64_t hash) {
692
693         uint64_t p, h;
694         int r;
695
696         assert(f);
697         assert(o);
698         assert(offset > 0);
699
700         if (o->object.type != OBJECT_DATA)
701                 return -EINVAL;
702
703         /* This might alter the window we are looking at */
704
705         o->data.next_hash_offset = o->data.next_field_offset = 0;
706         o->data.entry_offset = o->data.entry_array_offset = 0;
707         o->data.n_entries = 0;
708
709         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
710         p = le64toh(f->data_hash_table[h].tail_hash_offset);
711         if (p == 0)
712                 /* Only entry in the hash table is easy */
713                 f->data_hash_table[h].head_hash_offset = htole64(offset);
714         else {
715                 /* Move back to the previous data object, to patch in
716                  * pointer */
717
718                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
719                 if (r < 0)
720                         return r;
721
722                 o->data.next_hash_offset = htole64(offset);
723         }
724
725         f->data_hash_table[h].tail_hash_offset = htole64(offset);
726
727         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
728                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
729
730         return 0;
731 }
732
733 int journal_file_find_field_object_with_hash(
734                 JournalFile *f,
735                 const void *field, uint64_t size, uint64_t hash,
736                 Object **ret, uint64_t *offset) {
737
738         uint64_t p, osize, h;
739         int r;
740
741         assert(f);
742         assert(field && size > 0);
743
744         osize = offsetof(Object, field.payload) + size;
745
746         if (f->header->field_hash_table_size == 0)
747                 return -EBADMSG;
748
749         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
750         p = le64toh(f->field_hash_table[h].head_hash_offset);
751
752         while (p > 0) {
753                 Object *o;
754
755                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
756                 if (r < 0)
757                         return r;
758
759                 if (le64toh(o->field.hash) == hash &&
760                     le64toh(o->object.size) == osize &&
761                     memcmp(o->field.payload, field, size) == 0) {
762
763                         if (ret)
764                                 *ret = o;
765                         if (offset)
766                                 *offset = p;
767
768                         return 1;
769                 }
770
771                 p = le64toh(o->field.next_hash_offset);
772         }
773
774         return 0;
775 }
776
777 int journal_file_find_field_object(
778                 JournalFile *f,
779                 const void *field, uint64_t size,
780                 Object **ret, uint64_t *offset) {
781
782         uint64_t hash;
783
784         assert(f);
785         assert(field && size > 0);
786
787         hash = hash64(field, size);
788
789         return journal_file_find_field_object_with_hash(f,
790                                                         field, size, hash,
791                                                         ret, offset);
792 }
793
794 int journal_file_find_data_object_with_hash(
795                 JournalFile *f,
796                 const void *data, uint64_t size, uint64_t hash,
797                 Object **ret, uint64_t *offset) {
798
799         uint64_t p, osize, h;
800         int r;
801
802         assert(f);
803         assert(data || size == 0);
804
805         osize = offsetof(Object, data.payload) + size;
806
807         if (f->header->data_hash_table_size == 0)
808                 return -EBADMSG;
809
810         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
811         p = le64toh(f->data_hash_table[h].head_hash_offset);
812
813         while (p > 0) {
814                 Object *o;
815
816                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
817                 if (r < 0)
818                         return r;
819
820                 if (le64toh(o->data.hash) != hash)
821                         goto next;
822
823                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
824 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
825                         uint64_t l;
826                         size_t rsize;
827
828                         l = le64toh(o->object.size);
829                         if (l <= offsetof(Object, data.payload))
830                                 return -EBADMSG;
831
832                         l -= offsetof(Object, data.payload);
833
834                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
835                                             o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
836                         if (r < 0)
837                                 return r;
838
839                         if (rsize == size &&
840                             memcmp(f->compress_buffer, data, size) == 0) {
841
842                                 if (ret)
843                                         *ret = o;
844
845                                 if (offset)
846                                         *offset = p;
847
848                                 return 1;
849                         }
850 #else
851                         return -EPROTONOSUPPORT;
852 #endif
853                 } else if (le64toh(o->object.size) == osize &&
854                            memcmp(o->data.payload, data, size) == 0) {
855
856                         if (ret)
857                                 *ret = o;
858
859                         if (offset)
860                                 *offset = p;
861
862                         return 1;
863                 }
864
865         next:
866                 p = le64toh(o->data.next_hash_offset);
867         }
868
869         return 0;
870 }
871
872 int journal_file_find_data_object(
873                 JournalFile *f,
874                 const void *data, uint64_t size,
875                 Object **ret, uint64_t *offset) {
876
877         uint64_t hash;
878
879         assert(f);
880         assert(data || size == 0);
881
882         hash = hash64(data, size);
883
884         return journal_file_find_data_object_with_hash(f,
885                                                        data, size, hash,
886                                                        ret, offset);
887 }
888
889 static int journal_file_append_field(
890                 JournalFile *f,
891                 const void *field, uint64_t size,
892                 Object **ret, uint64_t *offset) {
893
894         uint64_t hash, p;
895         uint64_t osize;
896         Object *o;
897         int r;
898
899         assert(f);
900         assert(field && size > 0);
901
902         hash = hash64(field, size);
903
904         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
905         if (r < 0)
906                 return r;
907         else if (r > 0) {
908
909                 if (ret)
910                         *ret = o;
911
912                 if (offset)
913                         *offset = p;
914
915                 return 0;
916         }
917
918         osize = offsetof(Object, field.payload) + size;
919         r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
920         if (r < 0)
921                 return r;
922
923         o->field.hash = htole64(hash);
924         memcpy(o->field.payload, field, size);
925
926         r = journal_file_link_field(f, o, p, hash);
927         if (r < 0)
928                 return r;
929
930         /* The linking might have altered the window, so let's
931          * refresh our pointer */
932         r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
933         if (r < 0)
934                 return r;
935
936 #ifdef HAVE_GCRYPT
937         r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
938         if (r < 0)
939                 return r;
940 #endif
941
942         if (ret)
943                 *ret = o;
944
945         if (offset)
946                 *offset = p;
947
948         return 0;
949 }
950
951 static int journal_file_append_data(
952                 JournalFile *f,
953                 const void *data, uint64_t size,
954                 Object **ret, uint64_t *offset) {
955
956         uint64_t hash, p;
957         uint64_t osize;
958         Object *o;
959         int r, compression = 0;
960         const void *eq;
961
962         assert(f);
963         assert(data || size == 0);
964
965         hash = hash64(data, size);
966
967         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
968         if (r < 0)
969                 return r;
970         else if (r > 0) {
971
972                 if (ret)
973                         *ret = o;
974
975                 if (offset)
976                         *offset = p;
977
978                 return 0;
979         }
980
981         osize = offsetof(Object, data.payload) + size;
982         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
983         if (r < 0)
984                 return r;
985
986         o->data.hash = htole64(hash);
987
988 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
989         if (f->compress_xz &&
990             size >= COMPRESSION_SIZE_THRESHOLD) {
991                 size_t rsize;
992
993                 compression = compress_blob(data, size, o->data.payload, &rsize);
994
995                 if (compression) {
996                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
997                         o->object.flags |= compression;
998
999                         log_debug("Compressed data object %"PRIu64" -> %zu using %s",
1000                                   size, rsize, object_compressed_to_string(compression));
1001                 }
1002         }
1003 #endif
1004
1005         if (!compression && size > 0)
1006                 memcpy(o->data.payload, data, size);
1007
1008         r = journal_file_link_data(f, o, p, hash);
1009         if (r < 0)
1010                 return r;
1011
1012         /* The linking might have altered the window, so let's
1013          * refresh our pointer */
1014         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1015         if (r < 0)
1016                 return r;
1017
1018         if (!data)
1019                 eq = NULL;
1020         else
1021                 eq = memchr(data, '=', size);
1022         if (eq && eq > data) {
1023                 Object *fo = NULL;
1024                 uint64_t fp;
1025
1026                 /* Create field object ... */
1027                 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1028                 if (r < 0)
1029                         return r;
1030
1031                 /* ... and link it in. */
1032                 o->data.next_field_offset = fo->field.head_data_offset;
1033                 fo->field.head_data_offset = le64toh(p);
1034         }
1035
1036 #ifdef HAVE_GCRYPT
1037         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1038         if (r < 0)
1039                 return r;
1040 #endif
1041
1042         if (ret)
1043                 *ret = o;
1044
1045         if (offset)
1046                 *offset = p;
1047
1048         return 0;
1049 }
1050
1051 uint64_t journal_file_entry_n_items(Object *o) {
1052         assert(o);
1053
1054         if (o->object.type != OBJECT_ENTRY)
1055                 return 0;
1056
1057         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1058 }
1059
1060 uint64_t journal_file_entry_array_n_items(Object *o) {
1061         assert(o);
1062
1063         if (o->object.type != OBJECT_ENTRY_ARRAY)
1064                 return 0;
1065
1066         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1067 }
1068
1069 uint64_t journal_file_hash_table_n_items(Object *o) {
1070         assert(o);
1071
1072         if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1073             o->object.type != OBJECT_FIELD_HASH_TABLE)
1074                 return 0;
1075
1076         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1077 }
1078
1079 static int link_entry_into_array(JournalFile *f,
1080                                  le64_t *first,
1081                                  le64_t *idx,
1082                                  uint64_t p) {
1083         int r;
1084         uint64_t n = 0, ap = 0, q, i, a, hidx;
1085         Object *o;
1086
1087         assert(f);
1088         assert(first);
1089         assert(idx);
1090         assert(p > 0);
1091
1092         a = le64toh(*first);
1093         i = hidx = le64toh(*idx);
1094         while (a > 0) {
1095
1096                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1097                 if (r < 0)
1098                         return r;
1099
1100                 n = journal_file_entry_array_n_items(o);
1101                 if (i < n) {
1102                         o->entry_array.items[i] = htole64(p);
1103                         *idx = htole64(hidx + 1);
1104                         return 0;
1105                 }
1106
1107                 i -= n;
1108                 ap = a;
1109                 a = le64toh(o->entry_array.next_entry_array_offset);
1110         }
1111
1112         if (hidx > n)
1113                 n = (hidx+1) * 2;
1114         else
1115                 n = n * 2;
1116
1117         if (n < 4)
1118                 n = 4;
1119
1120         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1121                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1122                                        &o, &q);
1123         if (r < 0)
1124                 return r;
1125
1126 #ifdef HAVE_GCRYPT
1127         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
1128         if (r < 0)
1129                 return r;
1130 #endif
1131
1132         o->entry_array.items[i] = htole64(p);
1133
1134         if (ap == 0)
1135                 *first = htole64(q);
1136         else {
1137                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
1138                 if (r < 0)
1139                         return r;
1140
1141                 o->entry_array.next_entry_array_offset = htole64(q);
1142         }
1143
1144         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1145                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1146
1147         *idx = htole64(hidx + 1);
1148
1149         return 0;
1150 }
1151
1152 static int link_entry_into_array_plus_one(JournalFile *f,
1153                                           le64_t *extra,
1154                                           le64_t *first,
1155                                           le64_t *idx,
1156                                           uint64_t p) {
1157
1158         int r;
1159
1160         assert(f);
1161         assert(extra);
1162         assert(first);
1163         assert(idx);
1164         assert(p > 0);
1165
1166         if (*idx == 0)
1167                 *extra = htole64(p);
1168         else {
1169                 le64_t i;
1170
1171                 i = htole64(le64toh(*idx) - 1);
1172                 r = link_entry_into_array(f, first, &i, p);
1173                 if (r < 0)
1174                         return r;
1175         }
1176
1177         *idx = htole64(le64toh(*idx) + 1);
1178         return 0;
1179 }
1180
1181 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1182         uint64_t p;
1183         int r;
1184         assert(f);
1185         assert(o);
1186         assert(offset > 0);
1187
1188         p = le64toh(o->entry.items[i].object_offset);
1189         if (p == 0)
1190                 return -EINVAL;
1191
1192         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1193         if (r < 0)
1194                 return r;
1195
1196         return link_entry_into_array_plus_one(f,
1197                                               &o->data.entry_offset,
1198                                               &o->data.entry_array_offset,
1199                                               &o->data.n_entries,
1200                                               offset);
1201 }
1202
1203 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
1204         uint64_t n, i;
1205         int r;
1206
1207         assert(f);
1208         assert(o);
1209         assert(offset > 0);
1210
1211         if (o->object.type != OBJECT_ENTRY)
1212                 return -EINVAL;
1213
1214         __sync_synchronize();
1215
1216         /* Link up the entry itself */
1217         r = link_entry_into_array(f,
1218                                   &f->header->entry_array_offset,
1219                                   &f->header->n_entries,
1220                                   offset);
1221         if (r < 0)
1222                 return r;
1223
1224         /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
1225
1226         if (f->header->head_entry_realtime == 0)
1227                 f->header->head_entry_realtime = o->entry.realtime;
1228
1229         f->header->tail_entry_realtime = o->entry.realtime;
1230         f->header->tail_entry_monotonic = o->entry.monotonic;
1231
1232         f->tail_entry_monotonic_valid = true;
1233
1234         /* Link up the items */
1235         n = journal_file_entry_n_items(o);
1236         for (i = 0; i < n; i++) {
1237                 r = journal_file_link_entry_item(f, o, offset, i);
1238                 if (r < 0)
1239                         return r;
1240         }
1241
1242         return 0;
1243 }
1244
1245 static int journal_file_append_entry_internal(
1246                 JournalFile *f,
1247                 const dual_timestamp *ts,
1248                 uint64_t xor_hash,
1249                 const EntryItem items[], unsigned n_items,
1250                 uint64_t *seqnum,
1251                 Object **ret, uint64_t *offset) {
1252         uint64_t np;
1253         uint64_t osize;
1254         Object *o;
1255         int r;
1256
1257         assert(f);
1258         assert(items || n_items == 0);
1259         assert(ts);
1260
1261         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1262
1263         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1264         if (r < 0)
1265                 return r;
1266
1267         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1268         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1269         o->entry.realtime = htole64(ts->realtime);
1270         o->entry.monotonic = htole64(ts->monotonic);
1271         o->entry.xor_hash = htole64(xor_hash);
1272         o->entry.boot_id = f->header->boot_id;
1273
1274 #ifdef HAVE_GCRYPT
1275         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1276         if (r < 0)
1277                 return r;
1278 #endif
1279
1280         r = journal_file_link_entry(f, o, np);
1281         if (r < 0)
1282                 return r;
1283
1284         if (ret)
1285                 *ret = o;
1286
1287         if (offset)
1288                 *offset = np;
1289
1290         return 0;
1291 }
1292
1293 void journal_file_post_change(JournalFile *f) {
1294         assert(f);
1295
1296         /* inotify() does not receive IN_MODIFY events from file
1297          * accesses done via mmap(). After each access we hence
1298          * trigger IN_MODIFY by truncating the journal file to its
1299          * current size which triggers IN_MODIFY. */
1300
1301         __sync_synchronize();
1302
1303         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1304                 log_error("Failed to truncate file to its own size: %m");
1305 }
1306
1307 static int entry_item_cmp(const void *_a, const void *_b) {
1308         const EntryItem *a = _a, *b = _b;
1309
1310         if (le64toh(a->object_offset) < le64toh(b->object_offset))
1311                 return -1;
1312         if (le64toh(a->object_offset) > le64toh(b->object_offset))
1313                 return 1;
1314         return 0;
1315 }
1316
1317 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1318         unsigned i;
1319         EntryItem *items;
1320         int r;
1321         uint64_t xor_hash = 0;
1322         struct dual_timestamp _ts;
1323
1324         assert(f);
1325         assert(iovec || n_iovec == 0);
1326
1327         if (!ts) {
1328                 dual_timestamp_get(&_ts);
1329                 ts = &_ts;
1330         }
1331
1332         if (f->tail_entry_monotonic_valid &&
1333             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1334                 return -EINVAL;
1335
1336 #ifdef HAVE_GCRYPT
1337         r = journal_file_maybe_append_tag(f, ts->realtime);
1338         if (r < 0)
1339                 return r;
1340 #endif
1341
1342         /* alloca() can't take 0, hence let's allocate at least one */
1343         items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
1344
1345         for (i = 0; i < n_iovec; i++) {
1346                 uint64_t p;
1347                 Object *o;
1348
1349                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1350                 if (r < 0)
1351                         return r;
1352
1353                 xor_hash ^= le64toh(o->data.hash);
1354                 items[i].object_offset = htole64(p);
1355                 items[i].hash = o->data.hash;
1356         }
1357
1358         /* Order by the position on disk, in order to improve seek
1359          * times for rotating media. */
1360         qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1361
1362         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1363
1364         journal_file_post_change(f);
1365
1366         return r;
1367 }
1368
1369 typedef struct ChainCacheItem {
1370         uint64_t first; /* the array at the beginning of the chain */
1371         uint64_t array; /* the cached array */
1372         uint64_t begin; /* the first item in the cached array */
1373         uint64_t total; /* the total number of items in all arrays before this one in the chain */
1374         uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
1375 } ChainCacheItem;
1376
1377 static void chain_cache_put(
1378                 Hashmap *h,
1379                 ChainCacheItem *ci,
1380                 uint64_t first,
1381                 uint64_t array,
1382                 uint64_t begin,
1383                 uint64_t total,
1384                 uint64_t last_index) {
1385
1386         if (!ci) {
1387                 /* If the chain item to cache for this chain is the
1388                  * first one it's not worth caching anything */
1389                 if (array == first)
1390                         return;
1391
1392                 if (hashmap_size(h) >= CHAIN_CACHE_MAX)
1393                         ci = hashmap_steal_first(h);
1394                 else {
1395                         ci = new(ChainCacheItem, 1);
1396                         if (!ci)
1397                                 return;
1398                 }
1399
1400                 ci->first = first;
1401
1402                 if (hashmap_put(h, &ci->first, ci) < 0) {
1403                         free(ci);
1404                         return;
1405                 }
1406         } else
1407                 assert(ci->first == first);
1408
1409         ci->array = array;
1410         ci->begin = begin;
1411         ci->total = total;
1412         ci->last_index = last_index;
1413 }
1414
1415 static int generic_array_get(
1416                 JournalFile *f,
1417                 uint64_t first,
1418                 uint64_t i,
1419                 Object **ret, uint64_t *offset) {
1420
1421         Object *o;
1422         uint64_t p = 0, a, t = 0;
1423         int r;
1424         ChainCacheItem *ci;
1425
1426         assert(f);
1427
1428         a = first;
1429
1430         /* Try the chain cache first */
1431         ci = hashmap_get(f->chain_cache, &first);
1432         if (ci && i > ci->total) {
1433                 a = ci->array;
1434                 i -= ci->total;
1435                 t = ci->total;
1436         }
1437
1438         while (a > 0) {
1439                 uint64_t k;
1440
1441                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1442                 if (r < 0)
1443                         return r;
1444
1445                 k = journal_file_entry_array_n_items(o);
1446                 if (i < k) {
1447                         p = le64toh(o->entry_array.items[i]);
1448                         goto found;
1449                 }
1450
1451                 i -= k;
1452                 t += k;
1453                 a = le64toh(o->entry_array.next_entry_array_offset);
1454         }
1455
1456         return 0;
1457
1458 found:
1459         /* Let's cache this item for the next invocation */
1460         chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
1461
1462         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1463         if (r < 0)
1464                 return r;
1465
1466         if (ret)
1467                 *ret = o;
1468
1469         if (offset)
1470                 *offset = p;
1471
1472         return 1;
1473 }
1474
1475 static int generic_array_get_plus_one(
1476                 JournalFile *f,
1477                 uint64_t extra,
1478                 uint64_t first,
1479                 uint64_t i,
1480                 Object **ret, uint64_t *offset) {
1481
1482         Object *o;
1483
1484         assert(f);
1485
1486         if (i == 0) {
1487                 int r;
1488
1489                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1490                 if (r < 0)
1491                         return r;
1492
1493                 if (ret)
1494                         *ret = o;
1495
1496                 if (offset)
1497                         *offset = extra;
1498
1499                 return 1;
1500         }
1501
1502         return generic_array_get(f, first, i-1, ret, offset);
1503 }
1504
1505 enum {
1506         TEST_FOUND,
1507         TEST_LEFT,
1508         TEST_RIGHT
1509 };
1510
1511 static int generic_array_bisect(
1512                 JournalFile *f,
1513                 uint64_t first,
1514                 uint64_t n,
1515                 uint64_t needle,
1516                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1517                 direction_t direction,
1518                 Object **ret,
1519                 uint64_t *offset,
1520                 uint64_t *idx) {
1521
1522         uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
1523         bool subtract_one = false;
1524         Object *o, *array = NULL;
1525         int r;
1526         ChainCacheItem *ci;
1527
1528         assert(f);
1529         assert(test_object);
1530
1531         /* Start with the first array in the chain */
1532         a = first;
1533
1534         ci = hashmap_get(f->chain_cache, &first);
1535         if (ci && n > ci->total) {
1536                 /* Ah, we have iterated this bisection array chain
1537                  * previously! Let's see if we can skip ahead in the
1538                  * chain, as far as the last time. But we can't jump
1539                  * backwards in the chain, so let's check that
1540                  * first. */
1541
1542                 r = test_object(f, ci->begin, needle);
1543                 if (r < 0)
1544                         return r;
1545
1546                 if (r == TEST_LEFT) {
1547                         /* OK, what we are looking for is right of the
1548                          * begin of this EntryArray, so let's jump
1549                          * straight to previously cached array in the
1550                          * chain */
1551
1552                         a = ci->array;
1553                         n -= ci->total;
1554                         t = ci->total;
1555                         last_index = ci->last_index;
1556                 }
1557         }
1558
1559         while (a > 0) {
1560                 uint64_t left, right, k, lp;
1561
1562                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1563                 if (r < 0)
1564                         return r;
1565
1566                 k = journal_file_entry_array_n_items(array);
1567                 right = MIN(k, n);
1568                 if (right <= 0)
1569                         return 0;
1570
1571                 i = right - 1;
1572                 lp = p = le64toh(array->entry_array.items[i]);
1573                 if (p <= 0)
1574                         return -EBADMSG;
1575
1576                 r = test_object(f, p, needle);
1577                 if (r < 0)
1578                         return r;
1579
1580                 if (r == TEST_FOUND)
1581                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1582
1583                 if (r == TEST_RIGHT) {
1584                         left = 0;
1585                         right -= 1;
1586
1587                         if (last_index != (uint64_t) -1) {
1588                                 assert(last_index <= right);
1589
1590                                 /* If we cached the last index we
1591                                  * looked at, let's try to not to jump
1592                                  * too wildly around and see if we can
1593                                  * limit the range to look at early to
1594                                  * the immediate neighbors of the last
1595                                  * index we looked at. */
1596
1597                                 if (last_index > 0) {
1598                                         uint64_t x = last_index - 1;
1599
1600                                         p = le64toh(array->entry_array.items[x]);
1601                                         if (p <= 0)
1602                                                 return -EBADMSG;
1603
1604                                         r = test_object(f, p, needle);
1605                                         if (r < 0)
1606                                                 return r;
1607
1608                                         if (r == TEST_FOUND)
1609                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1610
1611                                         if (r == TEST_RIGHT)
1612                                                 right = x;
1613                                         else
1614                                                 left = x + 1;
1615                                 }
1616
1617                                 if (last_index < right) {
1618                                         uint64_t y = last_index + 1;
1619
1620                                         p = le64toh(array->entry_array.items[y]);
1621                                         if (p <= 0)
1622                                                 return -EBADMSG;
1623
1624                                         r = test_object(f, p, needle);
1625                                         if (r < 0)
1626                                                 return r;
1627
1628                                         if (r == TEST_FOUND)
1629                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1630
1631                                         if (r == TEST_RIGHT)
1632                                                 right = y;
1633                                         else
1634                                                 left = y + 1;
1635                                 }
1636                         }
1637
1638                         for (;;) {
1639                                 if (left == right) {
1640                                         if (direction == DIRECTION_UP)
1641                                                 subtract_one = true;
1642
1643                                         i = left;
1644                                         goto found;
1645                                 }
1646
1647                                 assert(left < right);
1648                                 i = (left + right) / 2;
1649
1650                                 p = le64toh(array->entry_array.items[i]);
1651                                 if (p <= 0)
1652                                         return -EBADMSG;
1653
1654                                 r = test_object(f, p, needle);
1655                                 if (r < 0)
1656                                         return r;
1657
1658                                 if (r == TEST_FOUND)
1659                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1660
1661                                 if (r == TEST_RIGHT)
1662                                         right = i;
1663                                 else
1664                                         left = i + 1;
1665                         }
1666                 }
1667
1668                 if (k > n) {
1669                         if (direction == DIRECTION_UP) {
1670                                 i = n;
1671                                 subtract_one = true;
1672                                 goto found;
1673                         }
1674
1675                         return 0;
1676                 }
1677
1678                 last_p = lp;
1679
1680                 n -= k;
1681                 t += k;
1682                 last_index = (uint64_t) -1;
1683                 a = le64toh(array->entry_array.next_entry_array_offset);
1684         }
1685
1686         return 0;
1687
1688 found:
1689         if (subtract_one && t == 0 && i == 0)
1690                 return 0;
1691
1692         /* Let's cache this item for the next invocation */
1693         chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
1694
1695         if (subtract_one && i == 0)
1696                 p = last_p;
1697         else if (subtract_one)
1698                 p = le64toh(array->entry_array.items[i-1]);
1699         else
1700                 p = le64toh(array->entry_array.items[i]);
1701
1702         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1703         if (r < 0)
1704                 return r;
1705
1706         if (ret)
1707                 *ret = o;
1708
1709         if (offset)
1710                 *offset = p;
1711
1712         if (idx)
1713                 *idx = t + i + (subtract_one ? -1 : 0);
1714
1715         return 1;
1716 }
1717
1718
1719 static int generic_array_bisect_plus_one(
1720                 JournalFile *f,
1721                 uint64_t extra,
1722                 uint64_t first,
1723                 uint64_t n,
1724                 uint64_t needle,
1725                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1726                 direction_t direction,
1727                 Object **ret,
1728                 uint64_t *offset,
1729                 uint64_t *idx) {
1730
1731         int r;
1732         bool step_back = false;
1733         Object *o;
1734
1735         assert(f);
1736         assert(test_object);
1737
1738         if (n <= 0)
1739                 return 0;
1740
1741         /* This bisects the array in object 'first', but first checks
1742          * an extra  */
1743         r = test_object(f, extra, needle);
1744         if (r < 0)
1745                 return r;
1746
1747         if (r == TEST_FOUND)
1748                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1749
1750         /* if we are looking with DIRECTION_UP then we need to first
1751            see if in the actual array there is a matching entry, and
1752            return the last one of that. But if there isn't any we need
1753            to return this one. Hence remember this, and return it
1754            below. */
1755         if (r == TEST_LEFT)
1756                 step_back = direction == DIRECTION_UP;
1757
1758         if (r == TEST_RIGHT) {
1759                 if (direction == DIRECTION_DOWN)
1760                         goto found;
1761                 else
1762                         return 0;
1763         }
1764
1765         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1766
1767         if (r == 0 && step_back)
1768                 goto found;
1769
1770         if (r > 0 && idx)
1771                 (*idx) ++;
1772
1773         return r;
1774
1775 found:
1776         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1777         if (r < 0)
1778                 return r;
1779
1780         if (ret)
1781                 *ret = o;
1782
1783         if (offset)
1784                 *offset = extra;
1785
1786         if (idx)
1787                 *idx = 0;
1788
1789         return 1;
1790 }
1791
1792 _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1793         assert(f);
1794         assert(p > 0);
1795
1796         if (p == needle)
1797                 return TEST_FOUND;
1798         else if (p < needle)
1799                 return TEST_LEFT;
1800         else
1801                 return TEST_RIGHT;
1802 }
1803
1804 int journal_file_move_to_entry_by_offset(
1805                 JournalFile *f,
1806                 uint64_t p,
1807                 direction_t direction,
1808                 Object **ret,
1809                 uint64_t *offset) {
1810
1811         return generic_array_bisect(f,
1812                                     le64toh(f->header->entry_array_offset),
1813                                     le64toh(f->header->n_entries),
1814                                     p,
1815                                     test_object_offset,
1816                                     direction,
1817                                     ret, offset, NULL);
1818 }
1819
1820
1821 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1822         Object *o;
1823         int r;
1824
1825         assert(f);
1826         assert(p > 0);
1827
1828         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1829         if (r < 0)
1830                 return r;
1831
1832         if (le64toh(o->entry.seqnum) == needle)
1833                 return TEST_FOUND;
1834         else if (le64toh(o->entry.seqnum) < needle)
1835                 return TEST_LEFT;
1836         else
1837                 return TEST_RIGHT;
1838 }
1839
1840 int journal_file_move_to_entry_by_seqnum(
1841                 JournalFile *f,
1842                 uint64_t seqnum,
1843                 direction_t direction,
1844                 Object **ret,
1845                 uint64_t *offset) {
1846
1847         return generic_array_bisect(f,
1848                                     le64toh(f->header->entry_array_offset),
1849                                     le64toh(f->header->n_entries),
1850                                     seqnum,
1851                                     test_object_seqnum,
1852                                     direction,
1853                                     ret, offset, NULL);
1854 }
1855
1856 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1857         Object *o;
1858         int r;
1859
1860         assert(f);
1861         assert(p > 0);
1862
1863         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1864         if (r < 0)
1865                 return r;
1866
1867         if (le64toh(o->entry.realtime) == needle)
1868                 return TEST_FOUND;
1869         else if (le64toh(o->entry.realtime) < needle)
1870                 return TEST_LEFT;
1871         else
1872                 return TEST_RIGHT;
1873 }
1874
1875 int journal_file_move_to_entry_by_realtime(
1876                 JournalFile *f,
1877                 uint64_t realtime,
1878                 direction_t direction,
1879                 Object **ret,
1880                 uint64_t *offset) {
1881
1882         return generic_array_bisect(f,
1883                                     le64toh(f->header->entry_array_offset),
1884                                     le64toh(f->header->n_entries),
1885                                     realtime,
1886                                     test_object_realtime,
1887                                     direction,
1888                                     ret, offset, NULL);
1889 }
1890
1891 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1892         Object *o;
1893         int r;
1894
1895         assert(f);
1896         assert(p > 0);
1897
1898         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1899         if (r < 0)
1900                 return r;
1901
1902         if (le64toh(o->entry.monotonic) == needle)
1903                 return TEST_FOUND;
1904         else if (le64toh(o->entry.monotonic) < needle)
1905                 return TEST_LEFT;
1906         else
1907                 return TEST_RIGHT;
1908 }
1909
1910 static inline int find_data_object_by_boot_id(
1911                 JournalFile *f,
1912                 sd_id128_t boot_id,
1913                 Object **o,
1914                 uint64_t *b) {
1915         char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1916
1917         sd_id128_to_string(boot_id, t + 9);
1918         return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1919 }
1920
1921 int journal_file_move_to_entry_by_monotonic(
1922                 JournalFile *f,
1923                 sd_id128_t boot_id,
1924                 uint64_t monotonic,
1925                 direction_t direction,
1926                 Object **ret,
1927                 uint64_t *offset) {
1928
1929         Object *o;
1930         int r;
1931
1932         assert(f);
1933
1934         r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
1935         if (r < 0)
1936                 return r;
1937         if (r == 0)
1938                 return -ENOENT;
1939
1940         return generic_array_bisect_plus_one(f,
1941                                              le64toh(o->data.entry_offset),
1942                                              le64toh(o->data.entry_array_offset),
1943                                              le64toh(o->data.n_entries),
1944                                              monotonic,
1945                                              test_object_monotonic,
1946                                              direction,
1947                                              ret, offset, NULL);
1948 }
1949
1950 int journal_file_next_entry(
1951                 JournalFile *f,
1952                 Object *o, uint64_t p,
1953                 direction_t direction,
1954                 Object **ret, uint64_t *offset) {
1955
1956         uint64_t i, n, ofs;
1957         int r;
1958
1959         assert(f);
1960         assert(p > 0 || !o);
1961
1962         n = le64toh(f->header->n_entries);
1963         if (n <= 0)
1964                 return 0;
1965
1966         if (!o)
1967                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1968         else {
1969                 if (o->object.type != OBJECT_ENTRY)
1970                         return -EINVAL;
1971
1972                 r = generic_array_bisect(f,
1973                                          le64toh(f->header->entry_array_offset),
1974                                          le64toh(f->header->n_entries),
1975                                          p,
1976                                          test_object_offset,
1977                                          DIRECTION_DOWN,
1978                                          NULL, NULL,
1979                                          &i);
1980                 if (r <= 0)
1981                         return r;
1982
1983                 if (direction == DIRECTION_DOWN) {
1984                         if (i >= n - 1)
1985                                 return 0;
1986
1987                         i++;
1988                 } else {
1989                         if (i <= 0)
1990                                 return 0;
1991
1992                         i--;
1993                 }
1994         }
1995
1996         /* And jump to it */
1997         r = generic_array_get(f,
1998                               le64toh(f->header->entry_array_offset),
1999                               i,
2000                               ret, &ofs);
2001         if (r <= 0)
2002                 return r;
2003
2004         if (p > 0 &&
2005             (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
2006                 log_debug("%s: entry array corrupted at entry %"PRIu64,
2007                           f->path, i);
2008                 return -EBADMSG;
2009         }
2010
2011         if (offset)
2012                 *offset = ofs;
2013
2014         return 1;
2015 }
2016
2017 int journal_file_skip_entry(
2018                 JournalFile *f,
2019                 Object *o, uint64_t p,
2020                 int64_t skip,
2021                 Object **ret, uint64_t *offset) {
2022
2023         uint64_t i, n;
2024         int r;
2025
2026         assert(f);
2027         assert(o);
2028         assert(p > 0);
2029
2030         if (o->object.type != OBJECT_ENTRY)
2031                 return -EINVAL;
2032
2033         r = generic_array_bisect(f,
2034                                  le64toh(f->header->entry_array_offset),
2035                                  le64toh(f->header->n_entries),
2036                                  p,
2037                                  test_object_offset,
2038                                  DIRECTION_DOWN,
2039                                  NULL, NULL,
2040                                  &i);
2041         if (r <= 0)
2042                 return r;
2043
2044         /* Calculate new index */
2045         if (skip < 0) {
2046                 if ((uint64_t) -skip >= i)
2047                         i = 0;
2048                 else
2049                         i = i - (uint64_t) -skip;
2050         } else
2051                 i  += (uint64_t) skip;
2052
2053         n = le64toh(f->header->n_entries);
2054         if (n <= 0)
2055                 return -EBADMSG;
2056
2057         if (i >= n)
2058                 i = n-1;
2059
2060         return generic_array_get(f,
2061                                  le64toh(f->header->entry_array_offset),
2062                                  i,
2063                                  ret, offset);
2064 }
2065
2066 int journal_file_next_entry_for_data(
2067                 JournalFile *f,
2068                 Object *o, uint64_t p,
2069                 uint64_t data_offset,
2070                 direction_t direction,
2071                 Object **ret, uint64_t *offset) {
2072
2073         uint64_t n, i;
2074         int r;
2075         Object *d;
2076
2077         assert(f);
2078         assert(p > 0 || !o);
2079
2080         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2081         if (r < 0)
2082                 return r;
2083
2084         n = le64toh(d->data.n_entries);
2085         if (n <= 0)
2086                 return n;
2087
2088         if (!o)
2089                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2090         else {
2091                 if (o->object.type != OBJECT_ENTRY)
2092                         return -EINVAL;
2093
2094                 r = generic_array_bisect_plus_one(f,
2095                                                   le64toh(d->data.entry_offset),
2096                                                   le64toh(d->data.entry_array_offset),
2097                                                   le64toh(d->data.n_entries),
2098                                                   p,
2099                                                   test_object_offset,
2100                                                   DIRECTION_DOWN,
2101                                                   NULL, NULL,
2102                                                   &i);
2103
2104                 if (r <= 0)
2105                         return r;
2106
2107                 if (direction == DIRECTION_DOWN) {
2108                         if (i >= n - 1)
2109                                 return 0;
2110
2111                         i++;
2112                 } else {
2113                         if (i <= 0)
2114                                 return 0;
2115
2116                         i--;
2117                 }
2118
2119         }
2120
2121         return generic_array_get_plus_one(f,
2122                                           le64toh(d->data.entry_offset),
2123                                           le64toh(d->data.entry_array_offset),
2124                                           i,
2125                                           ret, offset);
2126 }
2127
2128 int journal_file_move_to_entry_by_offset_for_data(
2129                 JournalFile *f,
2130                 uint64_t data_offset,
2131                 uint64_t p,
2132                 direction_t direction,
2133                 Object **ret, uint64_t *offset) {
2134
2135         int r;
2136         Object *d;
2137
2138         assert(f);
2139
2140         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2141         if (r < 0)
2142                 return r;
2143
2144         return generic_array_bisect_plus_one(f,
2145                                              le64toh(d->data.entry_offset),
2146                                              le64toh(d->data.entry_array_offset),
2147                                              le64toh(d->data.n_entries),
2148                                              p,
2149                                              test_object_offset,
2150                                              direction,
2151                                              ret, offset, NULL);
2152 }
2153
2154 int journal_file_move_to_entry_by_monotonic_for_data(
2155                 JournalFile *f,
2156                 uint64_t data_offset,
2157                 sd_id128_t boot_id,
2158                 uint64_t monotonic,
2159                 direction_t direction,
2160                 Object **ret, uint64_t *offset) {
2161
2162         Object *o, *d;
2163         int r;
2164         uint64_t b, z;
2165
2166         assert(f);
2167
2168         /* First, seek by time */
2169         r = find_data_object_by_boot_id(f, boot_id, &o, &b);
2170         if (r < 0)
2171                 return r;
2172         if (r == 0)
2173                 return -ENOENT;
2174
2175         r = generic_array_bisect_plus_one(f,
2176                                           le64toh(o->data.entry_offset),
2177                                           le64toh(o->data.entry_array_offset),
2178                                           le64toh(o->data.n_entries),
2179                                           monotonic,
2180                                           test_object_monotonic,
2181                                           direction,
2182                                           NULL, &z, NULL);
2183         if (r <= 0)
2184                 return r;
2185
2186         /* And now, continue seeking until we find an entry that
2187          * exists in both bisection arrays */
2188
2189         for (;;) {
2190                 Object *qo;
2191                 uint64_t p, q;
2192
2193                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2194                 if (r < 0)
2195                         return r;
2196
2197                 r = generic_array_bisect_plus_one(f,
2198                                                   le64toh(d->data.entry_offset),
2199                                                   le64toh(d->data.entry_array_offset),
2200                                                   le64toh(d->data.n_entries),
2201                                                   z,
2202                                                   test_object_offset,
2203                                                   direction,
2204                                                   NULL, &p, NULL);
2205                 if (r <= 0)
2206                         return r;
2207
2208                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2209                 if (r < 0)
2210                         return r;
2211
2212                 r = generic_array_bisect_plus_one(f,
2213                                                   le64toh(o->data.entry_offset),
2214                                                   le64toh(o->data.entry_array_offset),
2215                                                   le64toh(o->data.n_entries),
2216                                                   p,
2217                                                   test_object_offset,
2218                                                   direction,
2219                                                   &qo, &q, NULL);
2220
2221                 if (r <= 0)
2222                         return r;
2223
2224                 if (p == q) {
2225                         if (ret)
2226                                 *ret = qo;
2227                         if (offset)
2228                                 *offset = q;
2229
2230                         return 1;
2231                 }
2232
2233                 z = q;
2234         }
2235 }
2236
2237 int journal_file_move_to_entry_by_seqnum_for_data(
2238                 JournalFile *f,
2239                 uint64_t data_offset,
2240                 uint64_t seqnum,
2241                 direction_t direction,
2242                 Object **ret, uint64_t *offset) {
2243
2244         Object *d;
2245         int r;
2246
2247         assert(f);
2248
2249         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2250         if (r < 0)
2251                 return r;
2252
2253         return generic_array_bisect_plus_one(f,
2254                                              le64toh(d->data.entry_offset),
2255                                              le64toh(d->data.entry_array_offset),
2256                                              le64toh(d->data.n_entries),
2257                                              seqnum,
2258                                              test_object_seqnum,
2259                                              direction,
2260                                              ret, offset, NULL);
2261 }
2262
2263 int journal_file_move_to_entry_by_realtime_for_data(
2264                 JournalFile *f,
2265                 uint64_t data_offset,
2266                 uint64_t realtime,
2267                 direction_t direction,
2268                 Object **ret, uint64_t *offset) {
2269
2270         Object *d;
2271         int r;
2272
2273         assert(f);
2274
2275         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2276         if (r < 0)
2277                 return r;
2278
2279         return generic_array_bisect_plus_one(f,
2280                                              le64toh(d->data.entry_offset),
2281                                              le64toh(d->data.entry_array_offset),
2282                                              le64toh(d->data.n_entries),
2283                                              realtime,
2284                                              test_object_realtime,
2285                                              direction,
2286                                              ret, offset, NULL);
2287 }
2288
2289 void journal_file_dump(JournalFile *f) {
2290         Object *o;
2291         int r;
2292         uint64_t p;
2293
2294         assert(f);
2295
2296         journal_file_print_header(f);
2297
2298         p = le64toh(f->header->header_size);
2299         while (p != 0) {
2300                 r = journal_file_move_to_object(f, -1, p, &o);
2301                 if (r < 0)
2302                         goto fail;
2303
2304                 switch (o->object.type) {
2305
2306                 case OBJECT_UNUSED:
2307                         printf("Type: OBJECT_UNUSED\n");
2308                         break;
2309
2310                 case OBJECT_DATA:
2311                         printf("Type: OBJECT_DATA\n");
2312                         break;
2313
2314                 case OBJECT_FIELD:
2315                         printf("Type: OBJECT_FIELD\n");
2316                         break;
2317
2318                 case OBJECT_ENTRY:
2319                         printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2320                                le64toh(o->entry.seqnum),
2321                                le64toh(o->entry.monotonic),
2322                                le64toh(o->entry.realtime));
2323                         break;
2324
2325                 case OBJECT_FIELD_HASH_TABLE:
2326                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2327                         break;
2328
2329                 case OBJECT_DATA_HASH_TABLE:
2330                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2331                         break;
2332
2333                 case OBJECT_ENTRY_ARRAY:
2334                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2335                         break;
2336
2337                 case OBJECT_TAG:
2338                         printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2339                                le64toh(o->tag.seqnum),
2340                                le64toh(o->tag.epoch));
2341                         break;
2342
2343                 default:
2344                         printf("Type: unknown (%u)\n", o->object.type);
2345                         break;
2346                 }
2347
2348                 if (o->object.flags & OBJECT_COMPRESSION_MASK)
2349                         printf("Flags: %s\n",
2350                                object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
2351
2352                 if (p == le64toh(f->header->tail_object_offset))
2353                         p = 0;
2354                 else
2355                         p = p + ALIGN64(le64toh(o->object.size));
2356         }
2357
2358         return;
2359 fail:
2360         log_error("File corrupt");
2361 }
2362
2363 static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2364         const char *x;
2365
2366         x = format_timestamp(buf, l, t);
2367         if (x)
2368                 return x;
2369         return " --- ";
2370 }
2371
2372 void journal_file_print_header(JournalFile *f) {
2373         char a[33], b[33], c[33], d[33];
2374         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
2375         struct stat st;
2376         char bytes[FORMAT_BYTES_MAX];
2377
2378         assert(f);
2379
2380         printf("File Path: %s\n"
2381                "File ID: %s\n"
2382                "Machine ID: %s\n"
2383                "Boot ID: %s\n"
2384                "Sequential Number ID: %s\n"
2385                "State: %s\n"
2386                "Compatible Flags:%s%s\n"
2387                "Incompatible Flags:%s%s%s\n"
2388                "Header size: %"PRIu64"\n"
2389                "Arena size: %"PRIu64"\n"
2390                "Data Hash Table Size: %"PRIu64"\n"
2391                "Field Hash Table Size: %"PRIu64"\n"
2392                "Rotate Suggested: %s\n"
2393                "Head Sequential Number: %"PRIu64"\n"
2394                "Tail Sequential Number: %"PRIu64"\n"
2395                "Head Realtime Timestamp: %s\n"
2396                "Tail Realtime Timestamp: %s\n"
2397                "Tail Monotonic Timestamp: %s\n"
2398                "Objects: %"PRIu64"\n"
2399                "Entry Objects: %"PRIu64"\n",
2400                f->path,
2401                sd_id128_to_string(f->header->file_id, a),
2402                sd_id128_to_string(f->header->machine_id, b),
2403                sd_id128_to_string(f->header->boot_id, c),
2404                sd_id128_to_string(f->header->seqnum_id, d),
2405                f->header->state == STATE_OFFLINE ? "OFFLINE" :
2406                f->header->state == STATE_ONLINE ? "ONLINE" :
2407                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
2408                JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2409                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
2410                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
2411                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
2412                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
2413                le64toh(f->header->header_size),
2414                le64toh(f->header->arena_size),
2415                le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2416                le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2417                yes_no(journal_file_rotate_suggested(f, 0)),
2418                le64toh(f->header->head_entry_seqnum),
2419                le64toh(f->header->tail_entry_seqnum),
2420                format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2421                format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
2422                format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
2423                le64toh(f->header->n_objects),
2424                le64toh(f->header->n_entries));
2425
2426         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2427                 printf("Data Objects: %"PRIu64"\n"
2428                        "Data Hash Table Fill: %.1f%%\n",
2429                        le64toh(f->header->n_data),
2430                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2431
2432         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2433                 printf("Field Objects: %"PRIu64"\n"
2434                        "Field Hash Table Fill: %.1f%%\n",
2435                        le64toh(f->header->n_fields),
2436                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2437
2438         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2439                 printf("Tag Objects: %"PRIu64"\n",
2440                        le64toh(f->header->n_tags));
2441         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2442                 printf("Entry Array Objects: %"PRIu64"\n",
2443                        le64toh(f->header->n_entry_arrays));
2444
2445         if (fstat(f->fd, &st) >= 0)
2446                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
2447 }
2448
2449 int journal_file_open(
2450                 const char *fname,
2451                 int flags,
2452                 mode_t mode,
2453                 bool compress,
2454                 bool seal,
2455                 JournalMetrics *metrics,
2456                 MMapCache *mmap_cache,
2457                 JournalFile *template,
2458                 JournalFile **ret) {
2459
2460         JournalFile *f;
2461         int r;
2462         bool newly_created = false;
2463
2464         assert(fname);
2465         assert(ret);
2466
2467         if ((flags & O_ACCMODE) != O_RDONLY &&
2468             (flags & O_ACCMODE) != O_RDWR)
2469                 return -EINVAL;
2470
2471         if (!endswith(fname, ".journal") &&
2472             !endswith(fname, ".journal~"))
2473                 return -EINVAL;
2474
2475         f = new0(JournalFile, 1);
2476         if (!f)
2477                 return -ENOMEM;
2478
2479         f->fd = -1;
2480         f->mode = mode;
2481
2482         f->flags = flags;
2483         f->prot = prot_from_flags(flags);
2484         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2485 #if defined(HAVE_LZ4)
2486         f->compress_lz4 = compress;
2487 #elif defined(HAVE_XZ)
2488         f->compress_xz = compress;
2489 #endif
2490 #ifdef HAVE_GCRYPT
2491         f->seal = seal;
2492 #endif
2493
2494         if (mmap_cache)
2495                 f->mmap = mmap_cache_ref(mmap_cache);
2496         else {
2497                 f->mmap = mmap_cache_new();
2498                 if (!f->mmap) {
2499                         r = -ENOMEM;
2500                         goto fail;
2501                 }
2502         }
2503
2504         f->path = strdup(fname);
2505         if (!f->path) {
2506                 r = -ENOMEM;
2507                 goto fail;
2508         }
2509
2510         f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func);
2511         if (!f->chain_cache) {
2512                 r = -ENOMEM;
2513                 goto fail;
2514         }
2515
2516         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2517         if (f->fd < 0) {
2518                 r = -errno;
2519                 goto fail;
2520         }
2521
2522         if (fstat(f->fd, &f->last_stat) < 0) {
2523                 r = -errno;
2524                 goto fail;
2525         }
2526
2527         if (f->last_stat.st_size == 0 && f->writable) {
2528                 uint64_t crtime;
2529
2530                 /* Let's attach the creation time to the journal file,
2531                  * so that the vacuuming code knows the age of this
2532                  * file even if the file might end up corrupted one
2533                  * day... Ideally we'd just use the creation time many
2534                  * file systems maintain for each file, but there is
2535                  * currently no usable API to query this, hence let's
2536                  * emulate this via extended attributes. If extended
2537                  * attributes are not supported we'll just skip this,
2538                  * and rely solely on mtime/atime/ctime of the file.*/
2539
2540                 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2541                 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2542
2543 #ifdef HAVE_GCRYPT
2544                 /* Try to load the FSPRG state, and if we can't, then
2545                  * just don't do sealing */
2546                 if (f->seal) {
2547                         r = journal_file_fss_load(f);
2548                         if (r < 0)
2549                                 f->seal = false;
2550                 }
2551 #endif
2552
2553                 r = journal_file_init_header(f, template);
2554                 if (r < 0)
2555                         goto fail;
2556
2557                 if (fstat(f->fd, &f->last_stat) < 0) {
2558                         r = -errno;
2559                         goto fail;
2560                 }
2561
2562                 newly_created = true;
2563         }
2564
2565         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2566                 r = -EIO;
2567                 goto fail;
2568         }
2569
2570         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2571         if (f->header == MAP_FAILED) {
2572                 f->header = NULL;
2573                 r = -errno;
2574                 goto fail;
2575         }
2576
2577         if (!newly_created) {
2578                 r = journal_file_verify_header(f);
2579                 if (r < 0)
2580                         goto fail;
2581         }
2582
2583 #ifdef HAVE_GCRYPT
2584         if (!newly_created && f->writable) {
2585                 r = journal_file_fss_load(f);
2586                 if (r < 0)
2587                         goto fail;
2588         }
2589 #endif
2590
2591         if (f->writable) {
2592                 if (metrics) {
2593                         journal_default_metrics(metrics, f->fd);
2594                         f->metrics = *metrics;
2595                 } else if (template)
2596                         f->metrics = template->metrics;
2597
2598                 r = journal_file_refresh_header(f);
2599                 if (r < 0)
2600                         goto fail;
2601         }
2602
2603 #ifdef HAVE_GCRYPT
2604         r = journal_file_hmac_setup(f);
2605         if (r < 0)
2606                 goto fail;
2607 #endif
2608
2609         if (newly_created) {
2610                 r = journal_file_setup_field_hash_table(f);
2611                 if (r < 0)
2612                         goto fail;
2613
2614                 r = journal_file_setup_data_hash_table(f);
2615                 if (r < 0)
2616                         goto fail;
2617
2618 #ifdef HAVE_GCRYPT
2619                 r = journal_file_append_first_tag(f);
2620                 if (r < 0)
2621                         goto fail;
2622 #endif
2623         }
2624
2625         r = journal_file_map_field_hash_table(f);
2626         if (r < 0)
2627                 goto fail;
2628
2629         r = journal_file_map_data_hash_table(f);
2630         if (r < 0)
2631                 goto fail;
2632
2633         *ret = f;
2634         return 0;
2635
2636 fail:
2637         journal_file_close(f);
2638
2639         return r;
2640 }
2641
2642 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2643         _cleanup_free_ char *p = NULL;
2644         size_t l;
2645         JournalFile *old_file, *new_file = NULL;
2646         int r;
2647
2648         assert(f);
2649         assert(*f);
2650
2651         old_file = *f;
2652
2653         if (!old_file->writable)
2654                 return -EINVAL;
2655
2656         if (!endswith(old_file->path, ".journal"))
2657                 return -EINVAL;
2658
2659         l = strlen(old_file->path);
2660         r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2661                      (int) l - 8, old_file->path,
2662                      SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2663                      le64toh((*f)->header->head_entry_seqnum),
2664                      le64toh((*f)->header->head_entry_realtime));
2665         if (r < 0)
2666                 return -ENOMEM;
2667
2668         r = rename(old_file->path, p);
2669         if (r < 0)
2670                 return -errno;
2671
2672         old_file->header->state = STATE_ARCHIVED;
2673
2674         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2675         journal_file_close(old_file);
2676
2677         *f = new_file;
2678         return r;
2679 }
2680
2681 int journal_file_open_reliably(
2682                 const char *fname,
2683                 int flags,
2684                 mode_t mode,
2685                 bool compress,
2686                 bool seal,
2687                 JournalMetrics *metrics,
2688                 MMapCache *mmap_cache,
2689                 JournalFile *template,
2690                 JournalFile **ret) {
2691
2692         int r;
2693         size_t l;
2694         _cleanup_free_ char *p = NULL;
2695
2696         r = journal_file_open(fname, flags, mode, compress, seal,
2697                               metrics, mmap_cache, template, ret);
2698         if (r != -EBADMSG && /* corrupted */
2699             r != -ENODATA && /* truncated */
2700             r != -EHOSTDOWN && /* other machine */
2701             r != -EPROTONOSUPPORT && /* incompatible feature */
2702             r != -EBUSY && /* unclean shutdown */
2703             r != -ESHUTDOWN /* already archived */)
2704                 return r;
2705
2706         if ((flags & O_ACCMODE) == O_RDONLY)
2707                 return r;
2708
2709         if (!(flags & O_CREAT))
2710                 return r;
2711
2712         if (!endswith(fname, ".journal"))
2713                 return r;
2714
2715         /* The file is corrupted. Rotate it away and try it again (but only once) */
2716
2717         l = strlen(fname);
2718         if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
2719                      (int) l - 8, fname,
2720                      (unsigned long long) now(CLOCK_REALTIME),
2721                      random_u64()) < 0)
2722                 return -ENOMEM;
2723
2724         r = rename(fname, p);
2725         if (r < 0)
2726                 return -errno;
2727
2728         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2729
2730         return journal_file_open(fname, flags, mode, compress, seal,
2731                                  metrics, mmap_cache, template, ret);
2732 }
2733
2734 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2735         uint64_t i, n;
2736         uint64_t q, xor_hash = 0;
2737         int r;
2738         EntryItem *items;
2739         dual_timestamp ts;
2740
2741         assert(from);
2742         assert(to);
2743         assert(o);
2744         assert(p);
2745
2746         if (!to->writable)
2747                 return -EPERM;
2748
2749         ts.monotonic = le64toh(o->entry.monotonic);
2750         ts.realtime = le64toh(o->entry.realtime);
2751
2752         n = journal_file_entry_n_items(o);
2753         /* alloca() can't take 0, hence let's allocate at least one */
2754         items = alloca(sizeof(EntryItem) * MAX(1u, n));
2755
2756         for (i = 0; i < n; i++) {
2757                 uint64_t l, h;
2758                 le64_t le_hash;
2759                 size_t t;
2760                 void *data;
2761                 Object *u;
2762
2763                 q = le64toh(o->entry.items[i].object_offset);
2764                 le_hash = o->entry.items[i].hash;
2765
2766                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2767                 if (r < 0)
2768                         return r;
2769
2770                 if (le_hash != o->data.hash)
2771                         return -EBADMSG;
2772
2773                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2774                 t = (size_t) l;
2775
2776                 /* We hit the limit on 32bit machines */
2777                 if ((uint64_t) t != l)
2778                         return -E2BIG;
2779
2780                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
2781 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
2782                         size_t rsize;
2783
2784                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
2785                                             o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
2786                         if (r < 0)
2787                                 return r;
2788
2789                         data = from->compress_buffer;
2790                         l = rsize;
2791 #else
2792                         return -EPROTONOSUPPORT;
2793 #endif
2794                 } else
2795                         data = o->data.payload;
2796
2797                 r = journal_file_append_data(to, data, l, &u, &h);
2798                 if (r < 0)
2799                         return r;
2800
2801                 xor_hash ^= le64toh(u->data.hash);
2802                 items[i].object_offset = htole64(h);
2803                 items[i].hash = u->data.hash;
2804
2805                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2806                 if (r < 0)
2807                         return r;
2808         }
2809
2810         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2811 }
2812
2813 void journal_default_metrics(JournalMetrics *m, int fd) {
2814         uint64_t fs_size = 0;
2815         struct statvfs ss;
2816         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2817
2818         assert(m);
2819         assert(fd >= 0);
2820
2821         if (fstatvfs(fd, &ss) >= 0)
2822                 fs_size = ss.f_frsize * ss.f_blocks;
2823
2824         if (m->max_use == (uint64_t) -1) {
2825
2826                 if (fs_size > 0) {
2827                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2828
2829                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2830                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2831
2832                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2833                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2834                 } else
2835                         m->max_use = DEFAULT_MAX_USE_LOWER;
2836         } else {
2837                 m->max_use = PAGE_ALIGN(m->max_use);
2838
2839                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2840                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2841         }
2842
2843         if (m->max_size == (uint64_t) -1) {
2844                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2845
2846                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2847                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2848         } else
2849                 m->max_size = PAGE_ALIGN(m->max_size);
2850
2851         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2852                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2853
2854         if (m->max_size*2 > m->max_use)
2855                 m->max_use = m->max_size*2;
2856
2857         if (m->min_size == (uint64_t) -1)
2858                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2859         else {
2860                 m->min_size = PAGE_ALIGN(m->min_size);
2861
2862                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2863                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2864
2865                 if (m->min_size > m->max_size)
2866                         m->max_size = m->min_size;
2867         }
2868
2869         if (m->keep_free == (uint64_t) -1) {
2870
2871                 if (fs_size > 0) {
2872                         m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
2873
2874                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2875                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2876
2877                 } else
2878                         m->keep_free = DEFAULT_KEEP_FREE;
2879         }
2880
2881         log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2882                   format_bytes(a, sizeof(a), m->max_use),
2883                   format_bytes(b, sizeof(b), m->max_size),
2884                   format_bytes(c, sizeof(c), m->min_size),
2885                   format_bytes(d, sizeof(d), m->keep_free));
2886 }
2887
2888 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2889         assert(f);
2890         assert(from || to);
2891
2892         if (from) {
2893                 if (f->header->head_entry_realtime == 0)
2894                         return -ENOENT;
2895
2896                 *from = le64toh(f->header->head_entry_realtime);
2897         }
2898
2899         if (to) {
2900                 if (f->header->tail_entry_realtime == 0)
2901                         return -ENOENT;
2902
2903                 *to = le64toh(f->header->tail_entry_realtime);
2904         }
2905
2906         return 1;
2907 }
2908
2909 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2910         Object *o;
2911         uint64_t p;
2912         int r;
2913
2914         assert(f);
2915         assert(from || to);
2916
2917         r = find_data_object_by_boot_id(f, boot_id, &o, &p);
2918         if (r <= 0)
2919                 return r;
2920
2921         if (le64toh(o->data.n_entries) <= 0)
2922                 return 0;
2923
2924         if (from) {
2925                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2926                 if (r < 0)
2927                         return r;
2928
2929                 *from = le64toh(o->entry.monotonic);
2930         }
2931
2932         if (to) {
2933                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2934                 if (r < 0)
2935                         return r;
2936
2937                 r = generic_array_get_plus_one(f,
2938                                                le64toh(o->data.entry_offset),
2939                                                le64toh(o->data.entry_array_offset),
2940                                                le64toh(o->data.n_entries)-1,
2941                                                &o, NULL);
2942                 if (r <= 0)
2943                         return r;
2944
2945                 *to = le64toh(o->entry.monotonic);
2946         }
2947
2948         return 1;
2949 }
2950
2951 bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
2952         assert(f);
2953
2954         /* If we gained new header fields we gained new features,
2955          * hence suggest a rotation */
2956         if (le64toh(f->header->header_size) < sizeof(Header)) {
2957                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2958                 return true;
2959         }
2960
2961         /* Let's check if the hash tables grew over a certain fill
2962          * level (75%, borrowing this value from Java's hash table
2963          * implementation), and if so suggest a rotation. To calculate
2964          * the fill level we need the n_data field, which only exists
2965          * in newer versions. */
2966
2967         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2968                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2969                         log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
2970                                   f->path,
2971                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2972                                   le64toh(f->header->n_data),
2973                                   le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2974                                   (unsigned long long) f->last_stat.st_size,
2975                                   f->last_stat.st_size / le64toh(f->header->n_data));
2976                         return true;
2977                 }
2978
2979         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2980                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2981                         log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
2982                                   f->path,
2983                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2984                                   le64toh(f->header->n_fields),
2985                                   le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
2986                         return true;
2987                 }
2988
2989         /* Are the data objects properly indexed by field objects? */
2990         if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2991             JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2992             le64toh(f->header->n_data) > 0 &&
2993             le64toh(f->header->n_fields) == 0)
2994                 return true;
2995
2996         if (max_file_usec > 0) {
2997                 usec_t t, h;
2998
2999                 h = le64toh(f->header->head_entry_realtime);
3000                 t = now(CLOCK_REALTIME);
3001
3002                 if (h > 0 && t > h + max_file_usec)
3003                         return true;
3004         }
3005
3006         return false;
3007 }