journal: split up journal-file.c
authorLennart Poettering <lennart@poettering.net>
Wed, 15 Aug 2012 23:51:54 +0000 (01:51 +0200)
committerLennart Poettering <lennart@poettering.net>
Thu, 16 Aug 2012 15:10:57 +0000 (17:10 +0200)
14 files changed:
.gitignore
Makefile.am
src/journal/journal-authenticate.c [new file with mode: 0644]
src/journal/journal-authenticate.h [new file with mode: 0644]
src/journal/journal-file.c
src/journal/journal-file.h
src/journal/journal-vacuum.c [new file with mode: 0644]
src/journal/journal-vacuum.h [new file with mode: 0644]
src/journal/journal-verify.c [new file with mode: 0644]
src/journal/journal-verify.h [new file with mode: 0644]
src/journal/journalctl.c
src/journal/journald.c
src/journal/test-journal-verify.c [new file with mode: 0644]
src/journal/test-journal.c

index 8928071..4c8bba8 100644 (file)
@@ -1,3 +1,4 @@
+/test-journal-verify
 /test-journal-match
 /test-journal-stream
 /test-unit-name
index f220b59..895dcfa 100644 (file)
@@ -2397,6 +2397,12 @@ libsystemd_journal_la_SOURCES = \
        src/systemd/sd-journal.h \
        src/journal/journal-file.c \
        src/journal/journal-file.h \
+       src/journal/journal-vacuum.c \
+       src/journal/journal-vacuum.h \
+       src/journal/journal-verify.c \
+       src/journal/journal-verify.h \
+       src/journal/journal-authenticate.c \
+       src/journal/journal-authenticate.h \
        src/journal/lookup3.c \
        src/journal/lookup3.h \
        src/journal/journal-send.c \
diff --git a/src/journal/journal-authenticate.c b/src/journal/journal-authenticate.c
new file mode 100644 (file)
index 0000000..827e4e4
--- /dev/null
@@ -0,0 +1,434 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-authenticate.h"
+#include "fsprg.h"
+
+static void *fsprg_state(JournalFile *f) {
+        uint64_t a, b;
+        assert(f);
+
+        if (!f->authenticate)
+                return NULL;
+
+        a = le64toh(f->fsprg_header->header_size);
+        b = le64toh(f->fsprg_header->state_size);
+
+        if (a + b > f->fsprg_size)
+                return NULL;
+
+        return (uint8_t*) f->fsprg_header + a;
+}
+
+static uint64_t journal_file_tag_seqnum(JournalFile *f) {
+        uint64_t r;
+
+        assert(f);
+
+        r = le64toh(f->header->n_tags) + 1;
+        f->header->n_tags = htole64(r);
+
+        return r;
+}
+
+int journal_file_append_tag(JournalFile *f) {
+        Object *o;
+        uint64_t p;
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        if (!f->hmac_running)
+                return 0;
+
+        log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
+
+        assert(f->hmac);
+
+        r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
+        if (r < 0)
+                return r;
+
+        o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
+
+        /* Add the tag object itself, so that we can protect its
+         * header. This will exclude the actual hash value in it */
+        r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
+        if (r < 0)
+                return r;
+
+        /* Get the HMAC tag and store it in the object */
+        memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
+        f->hmac_running = false;
+
+        return 0;
+}
+
+static int journal_file_hmac_start(JournalFile *f) {
+        uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        if (f->hmac_running)
+                return 0;
+
+        /* Prepare HMAC for next cycle */
+        gcry_md_reset(f->hmac);
+        FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
+        gcry_md_setkey(f->hmac, key, sizeof(key));
+
+        f->hmac_running = true;
+
+        return 0;
+}
+
+static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
+        uint64_t t;
+
+        assert(f);
+        assert(epoch);
+        assert(f->authenticate);
+
+        if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
+            le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
+                return -ENOTSUP;
+
+        if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
+                return -ESTALE;
+
+        t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
+        t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
+
+        *epoch = t;
+        return 0;
+}
+
+static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
+        uint64_t goal, epoch;
+        int r;
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_get_epoch(f, realtime, &goal);
+        if (r < 0)
+                return r;
+
+        epoch = FSPRG_GetEpoch(fsprg_state(f));
+        if (epoch > goal)
+                return -ESTALE;
+
+        return epoch != goal;
+}
+
+static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
+        uint64_t goal, epoch;
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_get_epoch(f, realtime, &goal);
+        if (r < 0)
+                return r;
+
+        epoch = FSPRG_GetEpoch(fsprg_state(f));
+        if (epoch < goal)
+                log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
+
+        for (;;) {
+                if (epoch > goal)
+                        return -ESTALE;
+                if (epoch == goal)
+                        return 0;
+
+                FSPRG_Evolve(fsprg_state(f));
+                epoch = FSPRG_GetEpoch(fsprg_state(f));
+        }
+}
+
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_need_evolve(f, realtime);
+        if (r <= 0)
+                return 0;
+
+        r = journal_file_append_tag(f);
+        if (r < 0)
+                return r;
+
+        r = journal_file_evolve(f, realtime);
+        if (r < 0)
+                return r;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
+int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
+        int r;
+        Object *o;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        r = journal_file_move_to_object(f, type, p, &o);
+        if (r < 0)
+                return r;
+
+        gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
+
+        switch (o->object.type) {
+
+        case OBJECT_DATA:
+                /* All but: hash and payload are mutable */
+                gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
+                gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
+                break;
+
+        case OBJECT_ENTRY:
+                /* All */
+                gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
+                break;
+
+        case OBJECT_FIELD_HASH_TABLE:
+        case OBJECT_DATA_HASH_TABLE:
+        case OBJECT_ENTRY_ARRAY:
+                /* Nothing: everything is mutable */
+                break;
+
+        case OBJECT_TAG:
+                /* All but the tag itself */
+                gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
+                break;
+        default:
+                return -EINVAL;
+        }
+
+        return 0;
+}
+
+int journal_file_hmac_put_header(JournalFile *f) {
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        /* All but state+reserved, boot_id, arena_size,
+         * tail_object_offset, n_objects, n_entries, tail_seqnum,
+         * head_entry_realtime, tail_entry_realtime,
+         * tail_entry_monotonic, n_data, n_fields, header_tag */
+
+        gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
+        gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
+        gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
+        gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
+        gcry_md_write(f->hmac, &f->header->head_entry_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_entry_seqnum));
+
+        return 0;
+}
+
+int journal_file_load_fsprg(JournalFile *f) {
+        int r, fd = -1;
+        char *p = NULL;
+        struct stat st;
+        FSPRGHeader *m = NULL;
+        sd_id128_t machine;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = sd_id128_get_machine(&machine);
+        if (r < 0)
+                return r;
+
+        if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
+                     SD_ID128_FORMAT_VAL(machine)) < 0)
+                return -ENOMEM;
+
+        fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
+        if (fd < 0) {
+                log_error("Failed to open %s: %m", p);
+                r = -errno;
+                goto finish;
+        }
+
+        if (fstat(fd, &st) < 0) {
+                r = -errno;
+                goto finish;
+        }
+
+        if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
+                r = -ENODATA;
+                goto finish;
+        }
+
+        m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
+        if (m == MAP_FAILED) {
+                m = NULL;
+                r = -errno;
+                goto finish;
+        }
+
+        if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        if (m->incompatible_flags != 0) {
+                r = -EPROTONOSUPPORT;
+                goto finish;
+        }
+
+        if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
+        if ((uint64_t) st.st_size < f->fsprg_size) {
+                r = -ENODATA;
+                goto finish;
+        }
+
+        if (!sd_id128_equal(machine, m->machine_id)) {
+                r = -EHOSTDOWN;
+                goto finish;
+        }
+
+        if (le64toh(m->fsprg_start_usec) <= 0 ||
+            le64toh(m->fsprg_interval_usec) <= 0) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+        if (f->fsprg_header == MAP_FAILED) {
+                f->fsprg_header = NULL;
+                r = -errno;
+                goto finish;
+        }
+
+        r = 0;
+
+finish:
+        if (m)
+                munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
+
+        if (fd >= 0)
+                close_nointr_nofail(fd);
+
+        free(p);
+        return r;
+}
+
+int journal_file_setup_hmac(JournalFile *f) {
+        gcry_error_t e;
+
+        if (!f->authenticate)
+                return 0;
+
+        e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
+        if (e != 0)
+                return -ENOTSUP;
+
+        return 0;
+}
+
+int journal_file_append_first_tag(JournalFile *f) {
+        int r;
+        uint64_t p;
+
+        if (!f->authenticate)
+                return 0;
+
+        log_debug("Calculating first tag...");
+
+        r = journal_file_hmac_put_header(f);
+        if (r < 0)
+                return r;
+
+        p = le64toh(f->header->field_hash_table_offset);
+        if (p < offsetof(Object, hash_table.items))
+                return -EINVAL;
+        p -= offsetof(Object, hash_table.items);
+
+        r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
+        if (r < 0)
+                return r;
+
+        p = le64toh(f->header->data_hash_table_offset);
+        if (p < offsetof(Object, hash_table.items))
+                return -EINVAL;
+        p -= offsetof(Object, hash_table.items);
+
+        r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
+        if (r < 0)
+                return r;
+
+        r = journal_file_append_tag(f);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
diff --git a/src/journal/journal-authenticate.h b/src/journal/journal-authenticate.h
new file mode 100644 (file)
index 0000000..c991b22
--- /dev/null
@@ -0,0 +1,35 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "journal-file.h"
+
+int journal_file_append_tag(JournalFile *f);
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
+int journal_file_append_first_tag(JournalFile *f);
+
+int journal_file_hmac_put_header(JournalFile *f);
+int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
+
+int journal_file_load_fsprg(JournalFile *f);
+
+int journal_file_setup_hmac(JournalFile *f);
index 7beedb4..ff439f2 100644 (file)
@@ -29,6 +29,7 @@
 
 #include "journal-def.h"
 #include "journal-file.h"
+#include "journal-authenticate.h"
 #include "lookup3.h"
 #include "compress.h"
 #include "fsprg.h"
 /* n_data was the first entry we added after the initial file format design */
 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
 
-#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
-
-#define JOURNAL_HEADER_CONTAINS(h, field) \
-        (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
-
-static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
-static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
-
 void journal_file_close(JournalFile *f) {
         assert(f);
 
@@ -434,7 +427,7 @@ static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
         return r;
 }
 
-static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
+int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
         int r;
         uint64_t p;
         Object *tail, *o;
@@ -796,7 +789,7 @@ uint64_t journal_file_entry_n_items(Object *o) {
         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
 }
 
-static uint64_t journal_file_entry_array_n_items(Object *o) {
+uint64_t journal_file_entry_array_n_items(Object *o) {
         assert(o);
         assert(o->object.type == OBJECT_ENTRY_ARRAY);
 
@@ -1823,1174 +1816,241 @@ int journal_file_move_to_entry_by_realtime_for_data(
                                              ret, offset, NULL);
 }
 
-static void *fsprg_state(JournalFile *f) {
-        uint64_t a, b;
-        assert(f);
-
-        if (!f->authenticate)
-                return NULL;
-
-        a = le64toh(f->fsprg_header->header_size);
-        b = le64toh(f->fsprg_header->state_size);
-
-        if (a + b > f->fsprg_size)
-                return NULL;
-
-        return (uint8_t*) f->fsprg_header + a;
-}
-
-static uint64_t journal_file_tag_seqnum(JournalFile *f) {
-        uint64_t r;
-
-        assert(f);
-
-        r = le64toh(f->header->n_tags) + 1;
-        f->header->n_tags = htole64(r);
-
-        return r;
-}
-
-int journal_file_append_tag(JournalFile *f) {
+void journal_file_dump(JournalFile *f) {
         Object *o;
-        uint64_t p;
         int r;
+        uint64_t p;
 
         assert(f);
 
-        if (!f->authenticate)
-                return 0;
-
-        if (!f->hmac_running)
-                return 0;
-
-        log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
-
-        assert(f->hmac);
+        journal_file_print_header(f);
 
-        r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
-        if (r < 0)
-                return r;
+        p = le64toh(f->header->header_size);
+        while (p != 0) {
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0)
+                        goto fail;
 
-        o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
+                switch (o->object.type) {
 
-        /* Add the tag object itself, so that we can protect its
-         * header. This will exclude the actual hash value in it */
-        r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
-        if (r < 0)
-                return r;
+                case OBJECT_UNUSED:
+                        printf("Type: OBJECT_UNUSED\n");
+                        break;
 
-        /* Get the HMAC tag and store it in the object */
-        memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
-        f->hmac_running = false;
+                case OBJECT_DATA:
+                        printf("Type: OBJECT_DATA\n");
+                        break;
 
-        return 0;
-}
+                case OBJECT_ENTRY:
+                        printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
+                               (unsigned long long) le64toh(o->entry.seqnum),
+                               (unsigned long long) le64toh(o->entry.monotonic),
+                               (unsigned long long) le64toh(o->entry.realtime));
+                        break;
 
-static int journal_file_hmac_start(JournalFile *f) {
-        uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
+                case OBJECT_FIELD_HASH_TABLE:
+                        printf("Type: OBJECT_FIELD_HASH_TABLE\n");
+                        break;
 
-        assert(f);
+                case OBJECT_DATA_HASH_TABLE:
+                        printf("Type: OBJECT_DATA_HASH_TABLE\n");
+                        break;
 
-        if (!f->authenticate)
-                return 0;
+                case OBJECT_ENTRY_ARRAY:
+                        printf("Type: OBJECT_ENTRY_ARRAY\n");
+                        break;
 
-        if (f->hmac_running)
-                return 0;
+                case OBJECT_TAG:
+                        printf("Type: OBJECT_TAG %llu\n",
+                               (unsigned long long) le64toh(o->tag.seqnum));
+                        break;
+                }
 
-        /* Prepare HMAC for next cycle */
-        gcry_md_reset(f->hmac);
-        FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
-        gcry_md_setkey(f->hmac, key, sizeof(key));
+                if (o->object.flags & OBJECT_COMPRESSED)
+                        printf("Flags: COMPRESSED\n");
 
-        f->hmac_running = true;
+                if (p == le64toh(f->header->tail_object_offset))
+                        p = 0;
+                else
+                        p = p + ALIGN64(le64toh(o->object.size));
+        }
 
-        return 0;
+        return;
+fail:
+        log_error("File corrupt");
 }
 
-static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
-        uint64_t t;
+void journal_file_print_header(JournalFile *f) {
+        char a[33], b[33], c[33];
+        char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
 
         assert(f);
-        assert(epoch);
-        assert(f->authenticate);
 
-        if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
-            le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
-                return -ENOTSUP;
-
-        if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
-                return -ESTALE;
+        printf("File Path: %s\n"
+               "File ID: %s\n"
+               "Machine ID: %s\n"
+               "Boot ID: %s\n"
+               "Sequential Number ID: %s\n"
+               "State: %s\n"
+               "Compatible Flags:%s%s\n"
+               "Incompatible Flags:%s%s\n"
+               "Header size: %llu\n"
+               "Arena size: %llu\n"
+               "Data Hash Table Size: %llu\n"
+               "Field Hash Table Size: %llu\n"
+               "Objects: %llu\n"
+               "Entry Objects: %llu\n"
+               "Rotate Suggested: %s\n"
+               "Head Sequential Number: %llu\n"
+               "Tail Sequential Number: %llu\n"
+               "Head Realtime Timestamp: %s\n"
+               "Tail Realtime Timestamp: %s\n",
+               f->path,
+               sd_id128_to_string(f->header->file_id, a),
+               sd_id128_to_string(f->header->machine_id, b),
+               sd_id128_to_string(f->header->boot_id, c),
+               sd_id128_to_string(f->header->seqnum_id, c),
+               f->header->state == STATE_OFFLINE ? "offline" :
+               f->header->state == STATE_ONLINE ? "online" :
+               f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
+               (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
+               (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
+               (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
+               (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
+               (unsigned long long) le64toh(f->header->header_size),
+               (unsigned long long) le64toh(f->header->arena_size),
+               (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+               (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
+               (unsigned long long) le64toh(f->header->n_objects),
+               (unsigned long long) le64toh(f->header->n_entries),
+               yes_no(journal_file_rotate_suggested(f)),
+               (unsigned long long) le64toh(f->header->head_entry_seqnum),
+               (unsigned long long) le64toh(f->header->tail_entry_seqnum),
+               format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
+               format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
 
-        t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
-        t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+                printf("Data Objects: %llu\n"
+                       "Data Hash Table Fill: %.1f%%\n",
+                       (unsigned long long) le64toh(f->header->n_data),
+                       100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
 
-        *epoch = t;
-        return 0;
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+                printf("Field Objects: %llu\n"
+                       "Field Hash Table Fill: %.1f%%\n",
+                       (unsigned long long) le64toh(f->header->n_fields),
+                       100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
 }
 
-static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
-        uint64_t goal, epoch;
-        int r;
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
+int journal_file_open(
+                const char *fname,
+                int flags,
+                mode_t mode,
+                bool compress,
+                bool authenticate,
+                JournalMetrics *metrics,
+                MMapCache *mmap_cache,
+                JournalFile *template,
+                JournalFile **ret) {
 
-        r = journal_file_get_epoch(f, realtime, &goal);
-        if (r < 0)
-                return r;
+        JournalFile *f;
+        int r;
+        bool newly_created = false;
 
-        epoch = FSPRG_GetEpoch(fsprg_state(f));
-        if (epoch > goal)
-                return -ESTALE;
+        assert(fname);
 
-        return epoch != goal;
-}
+        if ((flags & O_ACCMODE) != O_RDONLY &&
+            (flags & O_ACCMODE) != O_RDWR)
+                return -EINVAL;
 
-static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
-        uint64_t goal, epoch;
-        int r;
+        if (!endswith(fname, ".journal"))
+                return -EINVAL;
 
-        assert(f);
+        f = new0(JournalFile, 1);
+        if (!f)
+                return -ENOMEM;
 
-        if (!f->authenticate)
-                return 0;
+        f->fd = -1;
+        f->mode = mode;
 
-        r = journal_file_get_epoch(f, realtime, &goal);
-        if (r < 0)
-                return r;
+        f->flags = flags;
+        f->prot = prot_from_flags(flags);
+        f->writable = (flags & O_ACCMODE) != O_RDONLY;
+        f->compress = compress;
+        f->authenticate = authenticate;
 
-        epoch = FSPRG_GetEpoch(fsprg_state(f));
-        if (epoch < goal)
-                log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
+        if (mmap_cache)
+                f->mmap = mmap_cache_ref(mmap_cache);
+        else {
+                /* One context for each type, plus the zeroth catchall
+                 * context. One fd for the file plus one for each type
+                 * (which we need during verification */
+                f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
+                if (!f->mmap) {
+                        r = -ENOMEM;
+                        goto fail;
+                }
+        }
 
-        for (;;) {
-                if (epoch > goal)
-                        return -ESTALE;
-                if (epoch == goal)
-                        return 0;
+        f->path = strdup(fname);
+        if (!f->path) {
+                r = -ENOMEM;
+                goto fail;
+        }
 
-                FSPRG_Evolve(fsprg_state(f));
-                epoch = FSPRG_GetEpoch(fsprg_state(f));
+        f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
+        if (f->fd < 0) {
+                r = -errno;
+                goto fail;
         }
-}
 
-static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
-        int r;
+        if (fstat(f->fd, &f->last_stat) < 0) {
+                r = -errno;
+                goto fail;
+        }
 
-        assert(f);
+        if (f->last_stat.st_size == 0 && f->writable) {
+                newly_created = true;
 
-        if (!f->authenticate)
-                return 0;
+                /* Try to load the FSPRG state, and if we can't, then
+                 * just don't do authentication */
+                r = journal_file_load_fsprg(f);
+                if (r < 0)
+                        f->authenticate = false;
 
-        r = journal_file_need_evolve(f, realtime);
-        if (r <= 0)
-                return 0;
+                r = journal_file_init_header(f, template);
+                if (r < 0)
+                        goto fail;
 
-        r = journal_file_append_tag(f);
-        if (r < 0)
-                return r;
+                if (fstat(f->fd, &f->last_stat) < 0) {
+                        r = -errno;
+                        goto fail;
+                }
+        }
 
-        r = journal_file_evolve(f, realtime);
-        if (r < 0)
-                return r;
+        if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
+                r = -EIO;
+                goto fail;
+        }
 
-        r = journal_file_hmac_start(f);
-        if (r < 0)
-                return r;
+        f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
+        if (f->header == MAP_FAILED) {
+                f->header = NULL;
+                r = -errno;
+                goto fail;
+        }
 
-        return 0;
-}
+        if (!newly_created) {
+                r = journal_file_verify_header(f);
+                if (r < 0)
+                        goto fail;
+        }
 
-static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
-        int r;
-        Object *o;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = journal_file_hmac_start(f);
-        if (r < 0)
-                return r;
-
-        r = journal_file_move_to_object(f, type, p, &o);
-        if (r < 0)
-                return r;
-
-        gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
-
-        switch (o->object.type) {
-
-        case OBJECT_DATA:
-                /* All but: hash and payload are mutable */
-                gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
-                gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
-                break;
-
-        case OBJECT_ENTRY:
-                /* All */
-                gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
-                break;
-
-        case OBJECT_FIELD_HASH_TABLE:
-        case OBJECT_DATA_HASH_TABLE:
-        case OBJECT_ENTRY_ARRAY:
-                /* Nothing: everything is mutable */
-                break;
-
-        case OBJECT_TAG:
-                /* All but the tag itself */
-                gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
-                break;
-        default:
-                return -EINVAL;
-        }
-
-        return 0;
-}
-
-static int journal_file_hmac_put_header(JournalFile *f) {
-        int r;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = journal_file_hmac_start(f);
-        if (r < 0)
-                return r;
-
-        /* All but state+reserved, boot_id, arena_size,
-         * tail_object_offset, n_objects, n_entries, tail_seqnum,
-         * head_entry_realtime, tail_entry_realtime,
-         * tail_entry_monotonic, n_data, n_fields, header_tag */
-
-        gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
-        gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
-        gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
-        gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
-        gcry_md_write(f->hmac, &f->header->head_entry_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_entry_seqnum));
-
-        return 0;
-}
-
-static int journal_file_load_fsprg(JournalFile *f) {
-        int r, fd = -1;
-        char *p = NULL;
-        struct stat st;
-        FSPRGHeader *m = NULL;
-        sd_id128_t machine;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = sd_id128_get_machine(&machine);
-        if (r < 0)
-                return r;
-
-        if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
-                     SD_ID128_FORMAT_VAL(machine)) < 0)
-                return -ENOMEM;
-
-        fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
-        if (fd < 0) {
-                log_error("Failed to open %s: %m", p);
-                r = -errno;
-                goto finish;
-        }
-
-        if (fstat(fd, &st) < 0) {
-                r = -errno;
-                goto finish;
-        }
-
-        if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
-                r = -ENODATA;
-                goto finish;
-        }
-
-        m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
-        if (m == MAP_FAILED) {
-                m = NULL;
-                r = -errno;
-                goto finish;
-        }
-
-        if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
-                r = -EBADMSG;
-                goto finish;
-        }
-
-        if (m->incompatible_flags != 0) {
-                r = -EPROTONOSUPPORT;
-                goto finish;
-        }
-
-        if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
-                r = -EBADMSG;
-                goto finish;
-        }
-
-        if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
-                r = -EBADMSG;
-                goto finish;
-        }
-
-        f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
-        if ((uint64_t) st.st_size < f->fsprg_size) {
-                r = -ENODATA;
-                goto finish;
-        }
-
-        if (!sd_id128_equal(machine, m->machine_id)) {
-                r = -EHOSTDOWN;
-                goto finish;
-        }
-
-        if (le64toh(m->fsprg_start_usec) <= 0 ||
-            le64toh(m->fsprg_interval_usec) <= 0) {
-                r = -EBADMSG;
-                goto finish;
-        }
-
-        f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
-        if (f->fsprg_header == MAP_FAILED) {
-                f->fsprg_header = NULL;
-                r = -errno;
-                goto finish;
-        }
-
-        r = 0;
-
-finish:
-        if (m)
-                munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
-
-        if (fd >= 0)
-                close_nointr_nofail(fd);
-
-        free(p);
-        return r;
-}
-
-static int journal_file_setup_hmac(JournalFile *f) {
-        gcry_error_t e;
-
-        if (!f->authenticate)
-                return 0;
-
-        e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
-        if (e != 0)
-                return -ENOTSUP;
-
-        return 0;
-}
-
-static int journal_file_append_first_tag(JournalFile *f) {
-        int r;
-        uint64_t p;
-
-        if (!f->authenticate)
-                return 0;
-
-        log_debug("Calculating first tag...");
-
-        r = journal_file_hmac_put_header(f);
-        if (r < 0)
-                return r;
-
-        p = le64toh(f->header->field_hash_table_offset);
-        if (p < offsetof(Object, hash_table.items))
-                return -EINVAL;
-        p -= offsetof(Object, hash_table.items);
-
-        r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
-        if (r < 0)
-                return r;
-
-        p = le64toh(f->header->data_hash_table_offset);
-        if (p < offsetof(Object, hash_table.items))
-                return -EINVAL;
-        p -= offsetof(Object, hash_table.items);
-
-        r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
-        if (r < 0)
-                return r;
-
-        r = journal_file_append_tag(f);
-        if (r < 0)
-                return r;
-
-        return 0;
-}
-
-static int journal_file_object_verify(JournalFile *f, Object *o) {
-        assert(f);
-        assert(o);
-
-        /* This does various superficial tests about the length an
-         * possible field values. It does not follow any references to
-         * other objects. */
-
-        switch (o->object.type) {
-        case OBJECT_DATA:
-                if (le64toh(o->data.entry_offset) <= 0 ||
-                    le64toh(o->data.n_entries) <= 0)
-                        return -EBADMSG;
-
-                if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
-                        return -EBADMSG;
-                break;
-
-        case OBJECT_FIELD:
-                if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
-                        return -EBADMSG;
-                break;
-
-        case OBJECT_ENTRY:
-                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
-                        return -EBADMSG;
-
-                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
-                        return -EBADMSG;
-
-                if (le64toh(o->entry.seqnum) <= 0 ||
-                    le64toh(o->entry.realtime) <= 0)
-                        return -EBADMSG;
-
-                break;
-
-        case OBJECT_DATA_HASH_TABLE:
-        case OBJECT_FIELD_HASH_TABLE:
-                if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0)
-                        return -EBADMSG;
-
-                break;
-
-        case OBJECT_ENTRY_ARRAY:
-                if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0)
-                        return -EBADMSG;
-
-                break;
-
-        case OBJECT_TAG:
-                if (le64toh(o->object.size) != sizeof(TagObject))
-                        return -EBADMSG;
-                break;
-        }
-
-        return 0;
-}
-
-static void draw_progress(uint64_t p, usec_t *last_usec) {
-        unsigned n, i, j, k;
-        usec_t z, x;
-
-        if (!isatty(STDOUT_FILENO))
-                return;
-
-        z = now(CLOCK_MONOTONIC);
-        x = *last_usec;
-
-        if (x != 0 && x + 40 * USEC_PER_MSEC > z)
-                return;
-
-        *last_usec = z;
-
-        n = (3 * columns()) / 4;
-        j = (n * (unsigned) p) / 65535ULL;
-        k = n - j;
-
-        fputs("\r\x1B[?25l", stdout);
-
-        for (i = 0; i < j; i++)
-                fputs("\xe2\x96\x88", stdout);
-
-        for (i = 0; i < k; i++)
-                fputs("\xe2\x96\x91", stdout);
-
-        printf(" %3lu%%", 100LU * (unsigned long) p / 65535LU);
-
-        fputs("\r\x1B[?25h", stdout);
-        fflush(stdout);
-}
-
-static void flush_progress(void) {
-        unsigned n, i;
-
-        if (!isatty(STDOUT_FILENO))
-                return;
-
-        n = (3 * columns()) / 4;
-
-        putchar('\r');
-
-        for (i = 0; i < n + 5; i++)
-                putchar(' ');
-
-        putchar('\r');
-        fflush(stdout);
-}
-
-static int write_uint64(int fd, uint64_t p) {
-        ssize_t k;
-
-        k = write(fd, &p, sizeof(p));
-        if (k < 0)
-                return -errno;
-        if (k != sizeof(p))
-                return -EIO;
-
-        return 0;
-}
-
-static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
-        uint64_t a, b;
-        int r;
-
-        assert(m);
-        assert(fd >= 0);
-
-        /* Bisection ... */
-
-        a = 0; b = n;
-        while (a < b) {
-                uint64_t c, *z;
-
-                c = (a + b) / 2;
-
-                r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
-                if (r < 0)
-                        return r;
-
-                if (*z == p)
-                        return 1;
-
-                if (p < *z)
-                        b = c;
-                else
-                        a = c;
-        }
-
-        return 0;
-}
-
-int journal_file_verify(JournalFile *f, const char *key) {
-        int r;
-        Object *o;
-        uint64_t p = 0;
-        uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
-        sd_id128_t entry_boot_id;
-        bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
-        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
-        usec_t last_usec = 0;
-        int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
-        char data_path[] = "/var/tmp/journal-data-XXXXXX",
-                entry_path[] = "/var/tmp/journal-entry-XXXXXX",
-                entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
-
-        assert(f);
-
-        data_fd = mkostemp(data_path, O_CLOEXEC);
-        if (data_fd < 0) {
-                log_error("Failed to create data file: %m");
-                goto fail;
-        }
-        unlink(data_path);
-
-        entry_fd = mkostemp(entry_path, O_CLOEXEC);
-        if (entry_fd < 0) {
-                log_error("Failed to create entry file: %m");
-                goto fail;
-        }
-        unlink(entry_path);
-
-        entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
-        if (entry_array_fd < 0) {
-                log_error("Failed to create entry array file: %m");
-                goto fail;
-        }
-        unlink(entry_array_path);
-
-        /* First iteration: we go through all objects, verify the
-         * superficial structure, headers, hashes. */
-
-        r = journal_file_hmac_put_header(f);
-        if (r < 0) {
-                log_error("Failed to calculate HMAC of header.");
-                goto fail;
-        }
-
-        p = le64toh(f->header->header_size);
-        while (p != 0) {
-                draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
-
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0) {
-                        log_error("Invalid object at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (le64toh(f->header->tail_object_offset) < p) {
-                        log_error("Invalid tail object pointer.");
-                        r = -EBADMSG;
-                        goto fail;
-                }
-
-                n_objects ++;
-
-                r = journal_file_object_verify(f, o);
-                if (r < 0) {
-                        log_error("Invalid object contents at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                r = journal_file_hmac_put_object(f, -1, p);
-                if (r < 0) {
-                        log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (o->object.flags & OBJECT_COMPRESSED &&
-                    !(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED)) {
-                        log_error("Compressed object without compression at %llu", (unsigned long long) p);
-                        r = -EBADMSG;
-                        goto fail;
-                }
-
-                if (o->object.flags & OBJECT_COMPRESSED &&
-                    o->object.type != OBJECT_DATA) {
-                        log_error("Compressed non-data object at %llu", (unsigned long long) p);
-                        r = -EBADMSG;
-                        goto fail;
-                }
-
-                if (o->object.type == OBJECT_TAG) {
-
-                        if (!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED)) {
-                                log_error("Tag object without authentication at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        if (le64toh(o->tag.seqnum) != tag_seqnum) {
-                                log_error("Tag sequence number out of synchronization at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                } else if (o->object.type == OBJECT_ENTRY) {
-
-                        r = write_uint64(entry_fd, p);
-                        if (r < 0)
-                                goto fail;
-
-                        if (!entry_seqnum_set &&
-                            le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
-                                log_error("Head entry sequence number incorrect");
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        if (entry_seqnum_set &&
-                            entry_seqnum >= le64toh(o->entry.seqnum)) {
-                                log_error("Entry sequence number out of synchronization at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        entry_seqnum = le64toh(o->entry.seqnum);
-                        entry_seqnum_set = true;
-
-                        if (entry_monotonic_set &&
-                            sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
-                            entry_monotonic > le64toh(o->entry.monotonic)) {
-                                log_error("Entry timestamp out of synchronization at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        entry_monotonic = le64toh(o->entry.monotonic);
-                        entry_boot_id = o->entry.boot_id;
-                        entry_monotonic_set = true;
-
-                        if (!entry_realtime_set &&
-                            le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
-                                log_error("Head entry realtime timestamp incorrect");
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        entry_realtime = le64toh(o->entry.realtime);
-                        entry_realtime_set = true;
-
-                        n_entries ++;
-                } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
-
-                        r = write_uint64(entry_array_fd, p);
-                        if (r < 0)
-                                goto fail;
-
-                        if (p == le64toh(f->header->entry_array_offset)) {
-                                if (found_main_entry_array) {
-                                        log_error("More than one main entry array at %llu", (unsigned long long) p);
-                                        r = -EBADMSG;
-                                        goto fail;
-                                }
-
-                                found_main_entry_array = true;
-                        }
-
-                        n_entry_arrays++;
-
-                } else if (o->object.type == OBJECT_DATA) {
-
-                        r = write_uint64(data_fd, p);
-                        if (r < 0)
-                                goto fail;
-
-                        n_data++;
-
-                } else if (o->object.type == OBJECT_FIELD)
-                        n_fields++;
-                else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
-                        n_data_hash_tables++;
-
-                        if (n_data_hash_tables > 1) {
-                                log_error("More than one data hash table at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
-                            le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
-                                log_error("Header fields for data hash table invalid.");
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-                } else if (o->object.type == OBJECT_FIELD_HASH_TABLE) {
-                        n_field_hash_tables++;
-
-                        if (n_field_hash_tables > 1) {
-                                log_error("More than one field hash table at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
-                            le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
-                                log_error("Header fields for field hash table invalid.");
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-                } else if (o->object.type >= _OBJECT_TYPE_MAX)
-                        n_weird ++;
-
-                if (p == le64toh(f->header->tail_object_offset))
-                        p = 0;
-                else
-                        p = p + ALIGN64(le64toh(o->object.size));
-        }
-
-        if (n_objects != le64toh(f->header->n_objects)) {
-                log_error("Object number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (n_entries != le64toh(f->header->n_entries)) {
-                log_error("Entry number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
-            n_data != le64toh(f->header->n_data)) {
-                log_error("Data number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
-            n_fields != le64toh(f->header->n_fields)) {
-                log_error("Field number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
-            tag_seqnum != le64toh(f->header->n_tags)) {
-                log_error("Tag number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (n_data_hash_tables != 1) {
-                log_error("Missing data hash table");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (n_field_hash_tables != 1) {
-                log_error("Missing field hash table");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (!found_main_entry_array) {
-                log_error("Missing entry array");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (entry_seqnum_set &&
-            entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
-                log_error("Invalid tail seqnum");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (entry_monotonic_set &&
-            (!sd_id128_equal(entry_boot_id, f->header->boot_id) ||
-             entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
-                log_error("Invalid tail monotonic timestamp");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
-                log_error("Invalid tail realtime timestamp");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        /* Second iteration: we go through all objects again, this
-         * time verify all pointers. */
-
-        p = le64toh(f->header->header_size);
-        while (p != 0) {
-                draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
-
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0) {
-                        log_error("Invalid object at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (o->object.type == OBJECT_ENTRY_ARRAY) {
-                        uint64_t i = 0, n;
-
-                        if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
-                            !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
-                                log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        n = journal_file_entry_array_n_items(o);
-                        for (i = 0; i < n; i++) {
-                                if (le64toh(o->entry_array.items[i]) != 0 &&
-                                    !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
-
-                                        log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
-                                        r = -EBADMSG;
-                                        goto fail;
-                                }
-                        }
-
-                }
-
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0) {
-                        log_error("Invalid object at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (p == le64toh(f->header->tail_object_offset))
-                        p = 0;
-                else
-                        p = p + ALIGN64(le64toh(o->object.size));
-        }
-
-        flush_progress();
-
-        mmap_cache_close_fd(f->mmap, data_fd);
-        mmap_cache_close_fd(f->mmap, entry_fd);
-        mmap_cache_close_fd(f->mmap, entry_array_fd);
-
-        close_nointr_nofail(data_fd);
-        close_nointr_nofail(entry_fd);
-        close_nointr_nofail(entry_array_fd);
-
-        return 0;
-
-fail:
-        flush_progress();
-
-        log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
-                  f->path,
-                  (unsigned long long) p,
-                  (unsigned long long) f->last_stat.st_size,
-                  (unsigned long long) (100 * p / f->last_stat.st_size));
-
-        if (data_fd >= 0) {
-                mmap_cache_close_fd(f->mmap, data_fd);
-                close_nointr_nofail(data_fd);
-        }
-
-        if (entry_fd >= 0) {
-                mmap_cache_close_fd(f->mmap, entry_fd);
-                close_nointr_nofail(entry_fd);
-        }
-
-        if (entry_array_fd >= 0) {
-                mmap_cache_close_fd(f->mmap, entry_array_fd);
-                close_nointr_nofail(entry_array_fd);
-        }
-
-        return r;
-}
-
-void journal_file_dump(JournalFile *f) {
-        Object *o;
-        int r;
-        uint64_t p;
-
-        assert(f);
-
-        journal_file_print_header(f);
-
-        p = le64toh(f->header->header_size);
-        while (p != 0) {
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0)
-                        goto fail;
-
-                switch (o->object.type) {
-
-                case OBJECT_UNUSED:
-                        printf("Type: OBJECT_UNUSED\n");
-                        break;
-
-                case OBJECT_DATA:
-                        printf("Type: OBJECT_DATA\n");
-                        break;
-
-                case OBJECT_ENTRY:
-                        printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
-                               (unsigned long long) le64toh(o->entry.seqnum),
-                               (unsigned long long) le64toh(o->entry.monotonic),
-                               (unsigned long long) le64toh(o->entry.realtime));
-                        break;
-
-                case OBJECT_FIELD_HASH_TABLE:
-                        printf("Type: OBJECT_FIELD_HASH_TABLE\n");
-                        break;
-
-                case OBJECT_DATA_HASH_TABLE:
-                        printf("Type: OBJECT_DATA_HASH_TABLE\n");
-                        break;
-
-                case OBJECT_ENTRY_ARRAY:
-                        printf("Type: OBJECT_ENTRY_ARRAY\n");
-                        break;
-
-                case OBJECT_TAG:
-                        printf("Type: OBJECT_TAG %llu\n",
-                               (unsigned long long) le64toh(o->tag.seqnum));
-                        break;
-                }
-
-                if (o->object.flags & OBJECT_COMPRESSED)
-                        printf("Flags: COMPRESSED\n");
-
-                if (p == le64toh(f->header->tail_object_offset))
-                        p = 0;
-                else
-                        p = p + ALIGN64(le64toh(o->object.size));
-        }
-
-        return;
-fail:
-        log_error("File corrupt");
-}
-
-void journal_file_print_header(JournalFile *f) {
-        char a[33], b[33], c[33];
-        char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
-
-        assert(f);
-
-        printf("File Path: %s\n"
-               "File ID: %s\n"
-               "Machine ID: %s\n"
-               "Boot ID: %s\n"
-               "Sequential Number ID: %s\n"
-               "State: %s\n"
-               "Compatible Flags:%s%s\n"
-               "Incompatible Flags:%s%s\n"
-               "Header size: %llu\n"
-               "Arena size: %llu\n"
-               "Data Hash Table Size: %llu\n"
-               "Field Hash Table Size: %llu\n"
-               "Objects: %llu\n"
-               "Entry Objects: %llu\n"
-               "Rotate Suggested: %s\n"
-               "Head Sequential Number: %llu\n"
-               "Tail Sequential Number: %llu\n"
-               "Head Realtime Timestamp: %s\n"
-               "Tail Realtime Timestamp: %s\n",
-               f->path,
-               sd_id128_to_string(f->header->file_id, a),
-               sd_id128_to_string(f->header->machine_id, b),
-               sd_id128_to_string(f->header->boot_id, c),
-               sd_id128_to_string(f->header->seqnum_id, c),
-               f->header->state == STATE_OFFLINE ? "offline" :
-               f->header->state == STATE_ONLINE ? "online" :
-               f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
-               (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
-               (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
-               (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
-               (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
-               (unsigned long long) le64toh(f->header->header_size),
-               (unsigned long long) le64toh(f->header->arena_size),
-               (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
-               (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
-               (unsigned long long) le64toh(f->header->n_objects),
-               (unsigned long long) le64toh(f->header->n_entries),
-               yes_no(journal_file_rotate_suggested(f)),
-               (unsigned long long) le64toh(f->header->head_entry_seqnum),
-               (unsigned long long) le64toh(f->header->tail_entry_seqnum),
-               format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
-               format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
-
-        if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
-                printf("Data Objects: %llu\n"
-                       "Data Hash Table Fill: %.1f%%\n",
-                       (unsigned long long) le64toh(f->header->n_data),
-                       100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
-
-        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
-                printf("Field Objects: %llu\n"
-                       "Field Hash Table Fill: %.1f%%\n",
-                       (unsigned long long) le64toh(f->header->n_fields),
-                       100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
-}
-
-int journal_file_open(
-                const char *fname,
-                int flags,
-                mode_t mode,
-                bool compress,
-                bool authenticate,
-                JournalMetrics *metrics,
-                MMapCache *mmap_cache,
-                JournalFile *template,
-                JournalFile **ret) {
-
-        JournalFile *f;
-        int r;
-        bool newly_created = false;
-
-        assert(fname);
-
-        if ((flags & O_ACCMODE) != O_RDONLY &&
-            (flags & O_ACCMODE) != O_RDWR)
-                return -EINVAL;
-
-        if (!endswith(fname, ".journal"))
-                return -EINVAL;
-
-        f = new0(JournalFile, 1);
-        if (!f)
-                return -ENOMEM;
-
-        f->fd = -1;
-        f->mode = mode;
-
-        f->flags = flags;
-        f->prot = prot_from_flags(flags);
-        f->writable = (flags & O_ACCMODE) != O_RDONLY;
-        f->compress = compress;
-        f->authenticate = authenticate;
-
-        if (mmap_cache)
-                f->mmap = mmap_cache_ref(mmap_cache);
-        else {
-                /* One context for each type, plus the zeroth catchall
-                 * context. One fd for the file plus one for each type
-                 * (which we need during verification */
-                f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
-                if (!f->mmap) {
-                        r = -ENOMEM;
-                        goto fail;
-                }
-        }
-
-        f->path = strdup(fname);
-        if (!f->path) {
-                r = -ENOMEM;
-                goto fail;
-        }
-
-        f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
-        if (f->fd < 0) {
-                r = -errno;
-                goto fail;
-        }
-
-        if (fstat(f->fd, &f->last_stat) < 0) {
-                r = -errno;
-                goto fail;
-        }
-
-        if (f->last_stat.st_size == 0 && f->writable) {
-                newly_created = true;
-
-                /* Try to load the FSPRG state, and if we can't, then
-                 * just don't do authentication */
-                r = journal_file_load_fsprg(f);
-                if (r < 0)
-                        f->authenticate = false;
-
-                r = journal_file_init_header(f, template);
-                if (r < 0)
-                        goto fail;
-
-                if (fstat(f->fd, &f->last_stat) < 0) {
-                        r = -errno;
-                        goto fail;
-                }
-        }
-
-        if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
-                r = -EIO;
-                goto fail;
-        }
-
-        f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
-        if (f->header == MAP_FAILED) {
-                f->header = NULL;
-                r = -errno;
-                goto fail;
-        }
-
-        if (!newly_created) {
-                r = journal_file_verify_header(f);
-                if (r < 0)
-                        goto fail;
-        }
-
-        if (!newly_created && f->writable) {
-                r = journal_file_load_fsprg(f);
-                if (r < 0)
-                        goto fail;
-        }
+        if (!newly_created && f->writable) {
+                r = journal_file_load_fsprg(f);
+                if (r < 0)
+                        goto fail;
+        }
 
         if (f->writable) {
                 if (metrics) {
@@ -3139,203 +2199,6 @@ int journal_file_open_reliably(
         return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
 }
 
-struct vacuum_info {
-        off_t usage;
-        char *filename;
-
-        uint64_t realtime;
-        sd_id128_t seqnum_id;
-        uint64_t seqnum;
-
-        bool have_seqnum;
-};
-
-static int vacuum_compare(const void *_a, const void *_b) {
-        const struct vacuum_info *a, *b;
-
-        a = _a;
-        b = _b;
-
-        if (a->have_seqnum && b->have_seqnum &&
-            sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
-                if (a->seqnum < b->seqnum)
-                        return -1;
-                else if (a->seqnum > b->seqnum)
-                        return 1;
-                else
-                        return 0;
-        }
-
-        if (a->realtime < b->realtime)
-                return -1;
-        else if (a->realtime > b->realtime)
-                return 1;
-        else if (a->have_seqnum && b->have_seqnum)
-                return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
-        else
-                return strcmp(a->filename, b->filename);
-}
-
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
-        DIR *d;
-        int r = 0;
-        struct vacuum_info *list = NULL;
-        unsigned n_list = 0, n_allocated = 0, i;
-        uint64_t sum = 0;
-
-        assert(directory);
-
-        if (max_use <= 0)
-                return 0;
-
-        d = opendir(directory);
-        if (!d)
-                return -errno;
-
-        for (;;) {
-                int k;
-                struct dirent buf, *de;
-                size_t q;
-                struct stat st;
-                char *p;
-                unsigned long long seqnum = 0, realtime;
-                sd_id128_t seqnum_id;
-                bool have_seqnum;
-
-                k = readdir_r(d, &buf, &de);
-                if (k != 0) {
-                        r = -k;
-                        goto finish;
-                }
-
-                if (!de)
-                        break;
-
-                if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
-                        continue;
-
-                if (!S_ISREG(st.st_mode))
-                        continue;
-
-                q = strlen(de->d_name);
-
-                if (endswith(de->d_name, ".journal")) {
-
-                        /* Vacuum archived files */
-
-                        if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
-                                continue;
-
-                        if (de->d_name[q-8-16-1] != '-' ||
-                            de->d_name[q-8-16-1-16-1] != '-' ||
-                            de->d_name[q-8-16-1-16-1-32-1] != '@')
-                                continue;
-
-                        p = strdup(de->d_name);
-                        if (!p) {
-                                r = -ENOMEM;
-                                goto finish;
-                        }
-
-                        de->d_name[q-8-16-1-16-1] = 0;
-                        if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
-                                free(p);
-                                continue;
-                        }
-
-                        if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
-                                free(p);
-                                continue;
-                        }
-
-                        have_seqnum = true;
-
-                } else if (endswith(de->d_name, ".journal~")) {
-                        unsigned long long tmp;
-
-                        /* Vacuum corrupted files */
-
-                        if (q < 1 + 16 + 1 + 16 + 8 + 1)
-                                continue;
-
-                        if (de->d_name[q-1-8-16-1] != '-' ||
-                            de->d_name[q-1-8-16-1-16-1] != '@')
-                                continue;
-
-                        p = strdup(de->d_name);
-                        if (!p) {
-                                r = -ENOMEM;
-                                goto finish;
-                        }
-
-                        if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
-                                free(p);
-                                continue;
-                        }
-
-                        have_seqnum = false;
-                } else
-                        continue;
-
-                if (n_list >= n_allocated) {
-                        struct vacuum_info *j;
-
-                        n_allocated = MAX(n_allocated * 2U, 8U);
-                        j = realloc(list, n_allocated * sizeof(struct vacuum_info));
-                        if (!j) {
-                                free(p);
-                                r = -ENOMEM;
-                                goto finish;
-                        }
-
-                        list = j;
-                }
-
-                list[n_list].filename = p;
-                list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
-                list[n_list].seqnum = seqnum;
-                list[n_list].realtime = realtime;
-                list[n_list].seqnum_id = seqnum_id;
-                list[n_list].have_seqnum = have_seqnum;
-
-                sum += list[n_list].usage;
-
-                n_list ++;
-        }
-
-        if (n_list > 0)
-                qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
-
-        for(i = 0; i < n_list; i++) {
-                struct statvfs ss;
-
-                if (fstatvfs(dirfd(d), &ss) < 0) {
-                        r = -errno;
-                        goto finish;
-                }
-
-                if (sum <= max_use &&
-                    (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
-                        break;
-
-                if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
-                        log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
-                        sum -= list[i].usage;
-                } else if (errno != ENOENT)
-                        log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
-        }
-
-finish:
-        for (i = 0; i < n_list; i++)
-                free(list[i].filename);
-
-        free(list);
-
-        if (d)
-                closedir(d);
-
-        return r;
-}
 
 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
         uint64_t i, n;
index 0305c97..aba3d9a 100644 (file)
@@ -107,10 +107,17 @@ int journal_file_open_reliably(
                 JournalFile *template,
                 JournalFile **ret);
 
+#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
+
+#define JOURNAL_HEADER_CONTAINS(h, field) \
+        (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
+
 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret);
 
 uint64_t journal_file_entry_n_items(Object *o);
+uint64_t journal_file_entry_array_n_items(Object *o);
 
+int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset);
 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqno, Object **ret, uint64_t *offset);
 
 int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset);
@@ -138,8 +145,6 @@ void journal_file_print_header(JournalFile *f);
 
 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate);
 
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free);
-
 void journal_file_post_change(JournalFile *f);
 
 void journal_default_metrics(JournalMetrics *m, int fd);
@@ -148,7 +153,3 @@ int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *
 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec_t *from, usec_t *to);
 
 bool journal_file_rotate_suggested(JournalFile *f);
-
-int journal_file_append_tag(JournalFile *f);
-
-int journal_file_verify(JournalFile *f, const char *key);
diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c
new file mode 100644 (file)
index 0000000..ff2cd33
--- /dev/null
@@ -0,0 +1,230 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2011 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-vacuum.h"
+#include "sd-id128.h"
+#include "util.h"
+
+struct vacuum_info {
+        off_t usage;
+        char *filename;
+
+        uint64_t realtime;
+        sd_id128_t seqnum_id;
+        uint64_t seqnum;
+
+        bool have_seqnum;
+};
+
+static int vacuum_compare(const void *_a, const void *_b) {
+        const struct vacuum_info *a, *b;
+
+        a = _a;
+        b = _b;
+
+        if (a->have_seqnum && b->have_seqnum &&
+            sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
+                if (a->seqnum < b->seqnum)
+                        return -1;
+                else if (a->seqnum > b->seqnum)
+                        return 1;
+                else
+                        return 0;
+        }
+
+        if (a->realtime < b->realtime)
+                return -1;
+        else if (a->realtime > b->realtime)
+                return 1;
+        else if (a->have_seqnum && b->have_seqnum)
+                return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
+        else
+                return strcmp(a->filename, b->filename);
+}
+
+int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
+        DIR *d;
+        int r = 0;
+        struct vacuum_info *list = NULL;
+        unsigned n_list = 0, n_allocated = 0, i;
+        uint64_t sum = 0;
+
+        assert(directory);
+
+        if (max_use <= 0)
+                return 0;
+
+        d = opendir(directory);
+        if (!d)
+                return -errno;
+
+        for (;;) {
+                int k;
+                struct dirent buf, *de;
+                size_t q;
+                struct stat st;
+                char *p;
+                unsigned long long seqnum = 0, realtime;
+                sd_id128_t seqnum_id;
+                bool have_seqnum;
+
+                k = readdir_r(d, &buf, &de);
+                if (k != 0) {
+                        r = -k;
+                        goto finish;
+                }
+
+                if (!de)
+                        break;
+
+                if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
+                        continue;
+
+                if (!S_ISREG(st.st_mode))
+                        continue;
+
+                q = strlen(de->d_name);
+
+                if (endswith(de->d_name, ".journal")) {
+
+                        /* Vacuum archived files */
+
+                        if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
+                                continue;
+
+                        if (de->d_name[q-8-16-1] != '-' ||
+                            de->d_name[q-8-16-1-16-1] != '-' ||
+                            de->d_name[q-8-16-1-16-1-32-1] != '@')
+                                continue;
+
+                        p = strdup(de->d_name);
+                        if (!p) {
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        de->d_name[q-8-16-1-16-1] = 0;
+                        if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
+                                free(p);
+                                continue;
+                        }
+
+                        if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
+                                free(p);
+                                continue;
+                        }
+
+                        have_seqnum = true;
+
+                } else if (endswith(de->d_name, ".journal~")) {
+                        unsigned long long tmp;
+
+                        /* Vacuum corrupted files */
+
+                        if (q < 1 + 16 + 1 + 16 + 8 + 1)
+                                continue;
+
+                        if (de->d_name[q-1-8-16-1] != '-' ||
+                            de->d_name[q-1-8-16-1-16-1] != '@')
+                                continue;
+
+                        p = strdup(de->d_name);
+                        if (!p) {
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
+                                free(p);
+                                continue;
+                        }
+
+                        have_seqnum = false;
+                } else
+                        continue;
+
+                if (n_list >= n_allocated) {
+                        struct vacuum_info *j;
+
+                        n_allocated = MAX(n_allocated * 2U, 8U);
+                        j = realloc(list, n_allocated * sizeof(struct vacuum_info));
+                        if (!j) {
+                                free(p);
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        list = j;
+                }
+
+                list[n_list].filename = p;
+                list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
+                list[n_list].seqnum = seqnum;
+                list[n_list].realtime = realtime;
+                list[n_list].seqnum_id = seqnum_id;
+                list[n_list].have_seqnum = have_seqnum;
+
+                sum += list[n_list].usage;
+
+                n_list ++;
+        }
+
+        if (n_list > 0)
+                qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
+
+        for(i = 0; i < n_list; i++) {
+                struct statvfs ss;
+
+                if (fstatvfs(dirfd(d), &ss) < 0) {
+                        r = -errno;
+                        goto finish;
+                }
+
+                if (sum <= max_use &&
+                    (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
+                        break;
+
+                if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
+                        log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
+                        sum -= list[i].usage;
+                } else if (errno != ENOENT)
+                        log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
+        }
+
+finish:
+        for (i = 0; i < n_list; i++)
+                free(list[i].filename);
+
+        free(list);
+
+        if (d)
+                closedir(d);
+
+        return r;
+}
diff --git a/src/journal/journal-vacuum.h b/src/journal/journal-vacuum.h
new file mode 100644 (file)
index 0000000..9841d72
--- /dev/null
@@ -0,0 +1,26 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2011 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free);
diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
new file mode 100644 (file)
index 0000000..f3182e8
--- /dev/null
@@ -0,0 +1,558 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#include "util.h"
+#include "macro.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-authenticate.h"
+#include "journal-verify.h"
+
+static int journal_file_object_verify(JournalFile *f, Object *o) {
+        assert(f);
+        assert(o);
+
+        /* This does various superficial tests about the length an
+         * possible field values. It does not follow any references to
+         * other objects. */
+
+        switch (o->object.type) {
+        case OBJECT_DATA:
+                if (le64toh(o->data.entry_offset) <= 0 ||
+                    le64toh(o->data.n_entries) <= 0)
+                        return -EBADMSG;
+
+                if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
+                        return -EBADMSG;
+                break;
+
+        case OBJECT_FIELD:
+                if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
+                        return -EBADMSG;
+                break;
+
+        case OBJECT_ENTRY:
+                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
+                        return -EBADMSG;
+
+                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
+                        return -EBADMSG;
+
+                if (le64toh(o->entry.seqnum) <= 0 ||
+                    le64toh(o->entry.realtime) <= 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_DATA_HASH_TABLE:
+        case OBJECT_FIELD_HASH_TABLE:
+                if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_ENTRY_ARRAY:
+                if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_TAG:
+                if (le64toh(o->object.size) != sizeof(TagObject))
+                        return -EBADMSG;
+                break;
+        }
+
+        return 0;
+}
+
+static void draw_progress(uint64_t p, usec_t *last_usec) {
+        unsigned n, i, j, k;
+        usec_t z, x;
+
+        if (!isatty(STDOUT_FILENO))
+                return;
+
+        z = now(CLOCK_MONOTONIC);
+        x = *last_usec;
+
+        if (x != 0 && x + 40 * USEC_PER_MSEC > z)
+                return;
+
+        *last_usec = z;
+
+        n = (3 * columns()) / 4;
+        j = (n * (unsigned) p) / 65535ULL;
+        k = n - j;
+
+        fputs("\r\x1B[?25l", stdout);
+
+        for (i = 0; i < j; i++)
+                fputs("\xe2\x96\x88", stdout);
+
+        for (i = 0; i < k; i++)
+                fputs("\xe2\x96\x91", stdout);
+
+        printf(" %3lu%%", 100LU * (unsigned long) p / 65535LU);
+
+        fputs("\r\x1B[?25h", stdout);
+        fflush(stdout);
+}
+
+static void flush_progress(void) {
+        unsigned n, i;
+
+        if (!isatty(STDOUT_FILENO))
+                return;
+
+        n = (3 * columns()) / 4;
+
+        putchar('\r');
+
+        for (i = 0; i < n + 5; i++)
+                putchar(' ');
+
+        putchar('\r');
+        fflush(stdout);
+}
+
+static int write_uint64(int fd, uint64_t p) {
+        ssize_t k;
+
+        k = write(fd, &p, sizeof(p));
+        if (k < 0)
+                return -errno;
+        if (k != sizeof(p))
+                return -EIO;
+
+        return 0;
+}
+
+static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
+        uint64_t a, b;
+        int r;
+
+        assert(m);
+        assert(fd >= 0);
+
+        /* Bisection ... */
+
+        a = 0; b = n;
+        while (a < b) {
+                uint64_t c, *z;
+
+                c = (a + b) / 2;
+
+                r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
+                if (r < 0)
+                        return r;
+
+                if (*z == p)
+                        return 1;
+
+                if (p < *z)
+                        b = c;
+                else
+                        a = c;
+        }
+
+        return 0;
+}
+
+int journal_file_verify(JournalFile *f, const char *key) {
+        int r;
+        Object *o;
+        uint64_t p = 0;
+        uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
+        sd_id128_t entry_boot_id;
+        bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
+        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
+        usec_t last_usec = 0;
+        int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
+        char data_path[] = "/var/tmp/journal-data-XXXXXX",
+                entry_path[] = "/var/tmp/journal-entry-XXXXXX",
+                entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
+
+        assert(f);
+
+        data_fd = mkostemp(data_path, O_CLOEXEC);
+        if (data_fd < 0) {
+                log_error("Failed to create data file: %m");
+                goto fail;
+        }
+        unlink(data_path);
+
+        entry_fd = mkostemp(entry_path, O_CLOEXEC);
+        if (entry_fd < 0) {
+                log_error("Failed to create entry file: %m");
+                goto fail;
+        }
+        unlink(entry_path);
+
+        entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
+        if (entry_array_fd < 0) {
+                log_error("Failed to create entry array file: %m");
+                goto fail;
+        }
+        unlink(entry_array_path);
+
+        /* First iteration: we go through all objects, verify the
+         * superficial structure, headers, hashes. */
+
+        r = journal_file_hmac_put_header(f);
+        if (r < 0) {
+                log_error("Failed to calculate HMAC of header.");
+                goto fail;
+        }
+
+        p = le64toh(f->header->header_size);
+        while (p != 0) {
+                draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (le64toh(f->header->tail_object_offset) < p) {
+                        log_error("Invalid tail object pointer.");
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                n_objects ++;
+
+                r = journal_file_object_verify(f, o);
+                if (r < 0) {
+                        log_error("Invalid object contents at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                r = journal_file_hmac_put_object(f, -1, p);
+                if (r < 0) {
+                        log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (o->object.flags & OBJECT_COMPRESSED &&
+                    !(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED)) {
+                        log_error("Compressed object without compression at %llu", (unsigned long long) p);
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                if (o->object.flags & OBJECT_COMPRESSED &&
+                    o->object.type != OBJECT_DATA) {
+                        log_error("Compressed non-data object at %llu", (unsigned long long) p);
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                if (o->object.type == OBJECT_TAG) {
+
+                        if (!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED)) {
+                                log_error("Tag object without authentication at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(o->tag.seqnum) != tag_seqnum) {
+                                log_error("Tag sequence number out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                } else if (o->object.type == OBJECT_ENTRY) {
+
+                        r = write_uint64(entry_fd, p);
+                        if (r < 0)
+                                goto fail;
+
+                        if (!entry_seqnum_set &&
+                            le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
+                                log_error("Head entry sequence number incorrect");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (entry_seqnum_set &&
+                            entry_seqnum >= le64toh(o->entry.seqnum)) {
+                                log_error("Entry sequence number out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_seqnum = le64toh(o->entry.seqnum);
+                        entry_seqnum_set = true;
+
+                        if (entry_monotonic_set &&
+                            sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
+                            entry_monotonic > le64toh(o->entry.monotonic)) {
+                                log_error("Entry timestamp out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_monotonic = le64toh(o->entry.monotonic);
+                        entry_boot_id = o->entry.boot_id;
+                        entry_monotonic_set = true;
+
+                        if (!entry_realtime_set &&
+                            le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
+                                log_error("Head entry realtime timestamp incorrect");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_realtime = le64toh(o->entry.realtime);
+                        entry_realtime_set = true;
+
+                        n_entries ++;
+                } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
+
+                        r = write_uint64(entry_array_fd, p);
+                        if (r < 0)
+                                goto fail;
+
+                        if (p == le64toh(f->header->entry_array_offset)) {
+                                if (found_main_entry_array) {
+                                        log_error("More than one main entry array at %llu", (unsigned long long) p);
+                                        r = -EBADMSG;
+                                        goto fail;
+                                }
+
+                                found_main_entry_array = true;
+                        }
+
+                        n_entry_arrays++;
+
+                } else if (o->object.type == OBJECT_DATA) {
+
+                        r = write_uint64(data_fd, p);
+                        if (r < 0)
+                                goto fail;
+
+                        n_data++;
+
+                } else if (o->object.type == OBJECT_FIELD)
+                        n_fields++;
+                else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
+                        n_data_hash_tables++;
+
+                        if (n_data_hash_tables > 1) {
+                                log_error("More than one data hash table at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+                            le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+                                log_error("Header fields for data hash table invalid.");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+                } else if (o->object.type == OBJECT_FIELD_HASH_TABLE) {
+                        n_field_hash_tables++;
+
+                        if (n_field_hash_tables > 1) {
+                                log_error("More than one field hash table at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+                            le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+                                log_error("Header fields for field hash table invalid.");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+                } else if (o->object.type >= _OBJECT_TYPE_MAX)
+                        n_weird ++;
+
+                if (p == le64toh(f->header->tail_object_offset))
+                        p = 0;
+                else
+                        p = p + ALIGN64(le64toh(o->object.size));
+        }
+
+        if (n_objects != le64toh(f->header->n_objects)) {
+                log_error("Object number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_entries != le64toh(f->header->n_entries)) {
+                log_error("Entry number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+            n_data != le64toh(f->header->n_data)) {
+                log_error("Data number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+            n_fields != le64toh(f->header->n_fields)) {
+                log_error("Field number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
+            tag_seqnum != le64toh(f->header->n_tags)) {
+                log_error("Tag number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_data_hash_tables != 1) {
+                log_error("Missing data hash table");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_field_hash_tables != 1) {
+                log_error("Missing field hash table");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (!found_main_entry_array) {
+                log_error("Missing entry array");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_seqnum_set &&
+            entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
+                log_error("Invalid tail seqnum");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_monotonic_set &&
+            (!sd_id128_equal(entry_boot_id, f->header->boot_id) ||
+             entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
+                log_error("Invalid tail monotonic timestamp");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
+                log_error("Invalid tail realtime timestamp");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        /* Second iteration: we go through all objects again, this
+         * time verify all pointers. */
+
+        p = le64toh(f->header->header_size);
+        while (p != 0) {
+                draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (o->object.type == OBJECT_ENTRY_ARRAY) {
+                        uint64_t i = 0, n;
+
+                        if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
+                            !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
+                                log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        n = journal_file_entry_array_n_items(o);
+                        for (i = 0; i < n; i++) {
+                                if (le64toh(o->entry_array.items[i]) != 0 &&
+                                    !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
+
+                                        log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
+                                        r = -EBADMSG;
+                                        goto fail;
+                                }
+                        }
+
+                }
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (p == le64toh(f->header->tail_object_offset))
+                        p = 0;
+                else
+                        p = p + ALIGN64(le64toh(o->object.size));
+        }
+
+        flush_progress();
+
+        mmap_cache_close_fd(f->mmap, data_fd);
+        mmap_cache_close_fd(f->mmap, entry_fd);
+        mmap_cache_close_fd(f->mmap, entry_array_fd);
+
+        close_nointr_nofail(data_fd);
+        close_nointr_nofail(entry_fd);
+        close_nointr_nofail(entry_array_fd);
+
+        return 0;
+
+fail:
+        flush_progress();
+
+        log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
+                  f->path,
+                  (unsigned long long) p,
+                  (unsigned long long) f->last_stat.st_size,
+                  (unsigned long long) (100 * p / f->last_stat.st_size));
+
+        if (data_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, data_fd);
+                close_nointr_nofail(data_fd);
+        }
+
+        if (entry_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, entry_fd);
+                close_nointr_nofail(entry_fd);
+        }
+
+        if (entry_array_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, entry_array_fd);
+                close_nointr_nofail(entry_array_fd);
+        }
+
+        return r;
+}
diff --git a/src/journal/journal-verify.h b/src/journal/journal-verify.h
new file mode 100644 (file)
index 0000000..3ebdd5e
--- /dev/null
@@ -0,0 +1,26 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2011 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "journal-file.h"
+
+int journal_file_verify(JournalFile *f, const char *key);
index 8e09ff1..a70de06 100644 (file)
@@ -43,6 +43,7 @@
 #include "journal-internal.h"
 #include "fsprg.h"
 #include "journal-def.h"
+#include "journal-verify.h"
 
 #define DEFAULT_FSPRG_INTERVAL_USEC (15*USEC_PER_MINUTE)
 
index 384ed90..d431953 100644 (file)
@@ -47,6 +47,7 @@
 #include "list.h"
 #include "journal-rate-limit.h"
 #include "journal-internal.h"
+#include "journal-vacuum.h"
 #include "conf-parser.h"
 #include "journald.h"
 #include "virt.h"
diff --git a/src/journal/test-journal-verify.c b/src/journal/test-journal-verify.c
new file mode 100644 (file)
index 0000000..bada498
--- /dev/null
@@ -0,0 +1,78 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "util.h"
+#include "log.h"
+#include "journal-file.h"
+#include "journal-verify.h"
+
+#define N_ENTRIES 6000
+#define RANDOM_RANGE 77
+
+int main(int argc, char *argv[]) {
+        char t[] = "/tmp/journal-XXXXXX";
+        unsigned n;
+        JournalFile *f;
+
+        log_set_max_level(LOG_DEBUG);
+
+        assert_se(mkdtemp(t));
+        assert_se(chdir(t) >= 0);
+
+        log_info("Generating...");
+
+        assert_se(journal_file_open("test.journal", O_RDWR|O_CREAT, 0666, true, true, NULL, NULL, NULL, &f) == 0);
+
+        for (n = 0; n < N_ENTRIES; n++) {
+                struct iovec iovec;
+                struct dual_timestamp ts;
+                char *test;
+
+                dual_timestamp_get(&ts);
+
+                assert_se(asprintf(&test, "RANDOM=%lu", random() % RANDOM_RANGE));
+
+                iovec.iov_base = (void*) test;
+                iovec.iov_len = strlen(test);
+
+                assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL, NULL) == 0);
+
+                free(test);
+        }
+
+        journal_file_close(f);
+
+        log_info("Verifying...");
+
+        assert_se(journal_file_open("test.journal", O_RDONLY, 0666, false, false, NULL, NULL, NULL, &f) == 0);
+        assert_se(journal_file_verify(f, NULL) >= 0);
+        journal_file_close(f);
+
+        log_info("Exiting...");
+
+        assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
+
+        return 0;
+}
index 2fd19a7..05bb2ea 100644 (file)
 
 #include <systemd/sd-journal.h>
 
-#include "journal-file.h"
 #include "log.h"
+#include "journal-file.h"
+#include "journal-authenticate.h"
+#include "journal-vacuum.h"
 
 int main(int argc, char *argv[]) {
         dual_timestamp ts;