// Copyright 2015 The etcd Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package raft import ( "errors" "sync" pb "github.com/coreos/etcd/raft/raftpb" ) // ErrCompacted is returned by Storage.Entries/Compact when a requested // index is unavailable because it predates the last snapshot. var ErrCompacted = errors.New("requested index is unavailable due to compaction") // ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested // index is older than the existing snapshot. var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot") // ErrUnavailable is returned by Storage interface when the requested log entries // are unavailable. var ErrUnavailable = errors.New("requested entry at index is unavailable") // ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required // snapshot is temporarily unavailable. var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable") // Storage is an interface that may be implemented by the application // to retrieve log entries from storage. // // If any Storage method returns an error, the raft instance will // become inoperable and refuse to participate in elections; the // application is responsible for cleanup and recovery in this case. type Storage interface { // InitialState returns the saved HardState and ConfState information. InitialState() (pb.HardState, pb.ConfState, error) // Entries returns a slice of log entries in the range [lo,hi). // MaxSize limits the total size of the log entries returned, but // Entries returns at least one entry if any. Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) // Term returns the term of entry i, which must be in the range // [FirstIndex()-1, LastIndex()]. The term of the entry before // FirstIndex is retained for matching purposes even though the // rest of that entry may not be available. Term(i uint64) (uint64, error) // LastIndex returns the index of the last entry in the log. LastIndex() (uint64, error) // FirstIndex returns the index of the first log entry that is // possibly available via Entries (older entries have been incorporated // into the latest Snapshot; if storage only contains the dummy entry the // first log entry is not available). FirstIndex() (uint64, error) // Snapshot returns the most recent snapshot. // If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable, // so raft state machine could know that Storage needs some time to prepare // snapshot and call Snapshot later. Snapshot() (pb.Snapshot, error) } // MemoryStorage implements the Storage interface backed by an // in-memory array. type MemoryStorage struct { // Protects access to all fields. Most methods of MemoryStorage are // run on the raft goroutine, but Append() is run on an application // goroutine. sync.Mutex hardState pb.HardState snapshot pb.Snapshot // ents[i] has raft log position i+snapshot.Metadata.Index ents []pb.Entry } // NewMemoryStorage creates an empty MemoryStorage. func NewMemoryStorage() *MemoryStorage { return &MemoryStorage{ // When starting from scratch populate the list with a dummy entry at term zero. ents: make([]pb.Entry, 1), } } // InitialState implements the Storage interface. func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) { return ms.hardState, ms.snapshot.Metadata.ConfState, nil } // SetHardState saves the current HardState. func (ms *MemoryStorage) SetHardState(st pb.HardState) error { ms.Lock() defer ms.Unlock() ms.hardState = st return nil } // Entries implements the Storage interface. func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) { ms.Lock() defer ms.Unlock() offset := ms.ents[0].Index if lo <= offset { return nil, ErrCompacted } if hi > ms.lastIndex()+1 { raftLogger.Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex()) } // only contains dummy entries. if len(ms.ents) == 1 { return nil, ErrUnavailable } ents := ms.ents[lo-offset : hi-offset] return limitSize(ents, maxSize), nil } // Term implements the Storage interface. func (ms *MemoryStorage) Term(i uint64) (uint64, error) { ms.Lock() defer ms.Unlock() offset := ms.ents[0].Index if i < offset { return 0, ErrCompacted } if int(i-offset) >= len(ms.ents) { return 0, ErrUnavailable } return ms.ents[i-offset].Term, nil } // LastIndex implements the Storage interface. func (ms *MemoryStorage) LastIndex() (uint64, error) { ms.Lock() defer ms.Unlock() return ms.lastIndex(), nil } func (ms *MemoryStorage) lastIndex() uint64 { return ms.ents[0].Index + uint64(len(ms.ents)) - 1 } // FirstIndex implements the Storage interface. func (ms *MemoryStorage) FirstIndex() (uint64, error) { ms.Lock() defer ms.Unlock() return ms.firstIndex(), nil } func (ms *MemoryStorage) firstIndex() uint64 { return ms.ents[0].Index + 1 } // Snapshot implements the Storage interface. func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) { ms.Lock() defer ms.Unlock() return ms.snapshot, nil } // ApplySnapshot overwrites the contents of this Storage object with // those of the given snapshot. func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error { ms.Lock() defer ms.Unlock() //handle check for old snapshot being applied msIndex := ms.snapshot.Metadata.Index snapIndex := snap.Metadata.Index if msIndex >= snapIndex { return ErrSnapOutOfDate } ms.snapshot = snap ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}} return nil } // CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and // can be used to reconstruct the state at that point. // If any configuration changes have been made since the last compaction, // the result of the last ApplyConfChange must be passed in. func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) { ms.Lock() defer ms.Unlock() if i <= ms.snapshot.Metadata.Index { return pb.Snapshot{}, ErrSnapOutOfDate } offset := ms.ents[0].Index if i > ms.lastIndex() { raftLogger.Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex()) } ms.snapshot.Metadata.Index = i ms.snapshot.Metadata.Term = ms.ents[i-offset].Term if cs != nil { ms.snapshot.Metadata.ConfState = *cs } ms.snapshot.Data = data return ms.snapshot, nil } // Compact discards all log entries prior to compactIndex. // It is the application's responsibility to not attempt to compact an index // greater than raftLog.applied. func (ms *MemoryStorage) Compact(compactIndex uint64) error { ms.Lock() defer ms.Unlock() offset := ms.ents[0].Index if compactIndex <= offset { return ErrCompacted } if compactIndex > ms.lastIndex() { raftLogger.Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex()) } i := compactIndex - offset ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i) ents[0].Index = ms.ents[i].Index ents[0].Term = ms.ents[i].Term ents = append(ents, ms.ents[i+1:]...) ms.ents = ents return nil } // Append the new entries to storage. // TODO (xiangli): ensure the entries are continuous and // entries[0].Index > ms.entries[0].Index func (ms *MemoryStorage) Append(entries []pb.Entry) error { if len(entries) == 0 { return nil } ms.Lock() defer ms.Unlock() first := ms.firstIndex() last := entries[0].Index + uint64(len(entries)) - 1 // shortcut if there is no new entry. if last < first { return nil } // truncate compacted entries if first > entries[0].Index { entries = entries[first-entries[0].Index:] } offset := entries[0].Index - ms.ents[0].Index switch { case uint64(len(ms.ents)) > offset: ms.ents = append([]pb.Entry{}, ms.ents[:offset]...) ms.ents = append(ms.ents, entries...) case uint64(len(ms.ents)) == offset: ms.ents = append(ms.ents, entries...) default: raftLogger.Panicf("missing log entry [last: %d, append at: %d]", ms.lastIndex(), entries[0].Index) } return nil }