1 // Copyright 2015 The etcd Authors
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
21 pb "github.com/coreos/etcd/raft/raftpb"
24 // ErrCompacted is returned by Storage.Entries/Compact when a requested
25 // index is unavailable because it predates the last snapshot.
26 var ErrCompacted = errors.New("requested index is unavailable due to compaction")
28 // ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested
29 // index is older than the existing snapshot.
30 var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot")
32 // ErrUnavailable is returned by Storage interface when the requested log entries
34 var ErrUnavailable = errors.New("requested entry at index is unavailable")
36 // ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required
37 // snapshot is temporarily unavailable.
38 var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable")
40 // Storage is an interface that may be implemented by the application
41 // to retrieve log entries from storage.
43 // If any Storage method returns an error, the raft instance will
44 // become inoperable and refuse to participate in elections; the
45 // application is responsible for cleanup and recovery in this case.
46 type Storage interface {
47 // InitialState returns the saved HardState and ConfState information.
48 InitialState() (pb.HardState, pb.ConfState, error)
49 // Entries returns a slice of log entries in the range [lo,hi).
50 // MaxSize limits the total size of the log entries returned, but
51 // Entries returns at least one entry if any.
52 Entries(lo, hi, maxSize uint64) ([]pb.Entry, error)
53 // Term returns the term of entry i, which must be in the range
54 // [FirstIndex()-1, LastIndex()]. The term of the entry before
55 // FirstIndex is retained for matching purposes even though the
56 // rest of that entry may not be available.
57 Term(i uint64) (uint64, error)
58 // LastIndex returns the index of the last entry in the log.
59 LastIndex() (uint64, error)
60 // FirstIndex returns the index of the first log entry that is
61 // possibly available via Entries (older entries have been incorporated
62 // into the latest Snapshot; if storage only contains the dummy entry the
63 // first log entry is not available).
64 FirstIndex() (uint64, error)
65 // Snapshot returns the most recent snapshot.
66 // If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable,
67 // so raft state machine could know that Storage needs some time to prepare
68 // snapshot and call Snapshot later.
69 Snapshot() (pb.Snapshot, error)
72 // MemoryStorage implements the Storage interface backed by an
74 type MemoryStorage struct {
75 // Protects access to all fields. Most methods of MemoryStorage are
76 // run on the raft goroutine, but Append() is run on an application
80 hardState pb.HardState
82 // ents[i] has raft log position i+snapshot.Metadata.Index
86 // NewMemoryStorage creates an empty MemoryStorage.
87 func NewMemoryStorage() *MemoryStorage {
88 return &MemoryStorage{
89 // When starting from scratch populate the list with a dummy entry at term zero.
90 ents: make([]pb.Entry, 1),
94 // InitialState implements the Storage interface.
95 func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) {
96 return ms.hardState, ms.snapshot.Metadata.ConfState, nil
99 // SetHardState saves the current HardState.
100 func (ms *MemoryStorage) SetHardState(st pb.HardState) error {
107 // Entries implements the Storage interface.
108 func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) {
111 offset := ms.ents[0].Index
113 return nil, ErrCompacted
115 if hi > ms.lastIndex()+1 {
116 raftLogger.Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex())
118 // only contains dummy entries.
119 if len(ms.ents) == 1 {
120 return nil, ErrUnavailable
123 ents := ms.ents[lo-offset : hi-offset]
124 return limitSize(ents, maxSize), nil
127 // Term implements the Storage interface.
128 func (ms *MemoryStorage) Term(i uint64) (uint64, error) {
131 offset := ms.ents[0].Index
133 return 0, ErrCompacted
135 if int(i-offset) >= len(ms.ents) {
136 return 0, ErrUnavailable
138 return ms.ents[i-offset].Term, nil
141 // LastIndex implements the Storage interface.
142 func (ms *MemoryStorage) LastIndex() (uint64, error) {
145 return ms.lastIndex(), nil
148 func (ms *MemoryStorage) lastIndex() uint64 {
149 return ms.ents[0].Index + uint64(len(ms.ents)) - 1
152 // FirstIndex implements the Storage interface.
153 func (ms *MemoryStorage) FirstIndex() (uint64, error) {
156 return ms.firstIndex(), nil
159 func (ms *MemoryStorage) firstIndex() uint64 {
160 return ms.ents[0].Index + 1
163 // Snapshot implements the Storage interface.
164 func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) {
167 return ms.snapshot, nil
170 // ApplySnapshot overwrites the contents of this Storage object with
171 // those of the given snapshot.
172 func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error {
176 //handle check for old snapshot being applied
177 msIndex := ms.snapshot.Metadata.Index
178 snapIndex := snap.Metadata.Index
179 if msIndex >= snapIndex {
180 return ErrSnapOutOfDate
184 ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}}
188 // CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and
189 // can be used to reconstruct the state at that point.
190 // If any configuration changes have been made since the last compaction,
191 // the result of the last ApplyConfChange must be passed in.
192 func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) {
195 if i <= ms.snapshot.Metadata.Index {
196 return pb.Snapshot{}, ErrSnapOutOfDate
199 offset := ms.ents[0].Index
200 if i > ms.lastIndex() {
201 raftLogger.Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex())
204 ms.snapshot.Metadata.Index = i
205 ms.snapshot.Metadata.Term = ms.ents[i-offset].Term
207 ms.snapshot.Metadata.ConfState = *cs
209 ms.snapshot.Data = data
210 return ms.snapshot, nil
213 // Compact discards all log entries prior to compactIndex.
214 // It is the application's responsibility to not attempt to compact an index
215 // greater than raftLog.applied.
216 func (ms *MemoryStorage) Compact(compactIndex uint64) error {
219 offset := ms.ents[0].Index
220 if compactIndex <= offset {
223 if compactIndex > ms.lastIndex() {
224 raftLogger.Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex())
227 i := compactIndex - offset
228 ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i)
229 ents[0].Index = ms.ents[i].Index
230 ents[0].Term = ms.ents[i].Term
231 ents = append(ents, ms.ents[i+1:]...)
236 // Append the new entries to storage.
237 // TODO (xiangli): ensure the entries are continuous and
238 // entries[0].Index > ms.entries[0].Index
239 func (ms *MemoryStorage) Append(entries []pb.Entry) error {
240 if len(entries) == 0 {
247 first := ms.firstIndex()
248 last := entries[0].Index + uint64(len(entries)) - 1
250 // shortcut if there is no new entry.
254 // truncate compacted entries
255 if first > entries[0].Index {
256 entries = entries[first-entries[0].Index:]
259 offset := entries[0].Index - ms.ents[0].Index
261 case uint64(len(ms.ents)) > offset:
262 ms.ents = append([]pb.Entry{}, ms.ents[:offset]...)
263 ms.ents = append(ms.ents, entries...)
264 case uint64(len(ms.ents)) == offset:
265 ms.ents = append(ms.ents, entries...)
267 raftLogger.Panicf("missing log entry [last: %d, append at: %d]",
268 ms.lastIndex(), entries[0].Index)