1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
5 #include "leveldb/table.h"
7 #include "leveldb/cache.h"
8 #include "leveldb/comparator.h"
9 #include "leveldb/env.h"
10 #include "leveldb/filter_policy.h"
11 #include "leveldb/options.h"
12 #include "table/block.h"
13 #include "table/filter_block.h"
14 #include "table/format.h"
15 #include "table/two_level_iterator.h"
16 #include "util/coding.h"
29 RandomAccessFile* file;
31 FilterBlockReader* filter;
32 const char* filter_data;
34 BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
38 Status Table::Open(const Options& options, RandomAccessFile* file,
39 uint64_t size, Table** table) {
41 if (size < Footer::kEncodedLength) {
42 return Status::Corruption("file is too short to be an sstable");
45 char footer_space[Footer::kEncodedLength];
47 Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
48 &footer_input, footer_space);
49 if (!s.ok()) return s;
52 s = footer.DecodeFrom(&footer_input);
53 if (!s.ok()) return s;
55 // Read the index block
56 BlockContents index_block_contents;
58 if (options.paranoid_checks) {
59 opt.verify_checksums = true;
61 s = ReadBlock(file, opt, footer.index_handle(), &index_block_contents);
64 // We've successfully read the footer and the index block: we're
65 // ready to serve requests.
66 Block* index_block = new Block(index_block_contents);
67 Rep* rep = new Table::Rep;
68 rep->options = options;
70 rep->metaindex_handle = footer.metaindex_handle();
71 rep->index_block = index_block;
72 rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
73 rep->filter_data = nullptr;
74 rep->filter = nullptr;
75 *table = new Table(rep);
76 (*table)->ReadMeta(footer);
82 void Table::ReadMeta(const Footer& footer) {
83 if (rep_->options.filter_policy == nullptr) {
84 return; // Do not need any metadata
87 // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
88 // it is an empty block.
90 if (rep_->options.paranoid_checks) {
91 opt.verify_checksums = true;
93 BlockContents contents;
94 if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
95 // Do not propagate errors since meta info is not needed for operation
98 Block* meta = new Block(contents);
100 Iterator* iter = meta->NewIterator(BytewiseComparator());
101 std::string key = "filter.";
102 key.append(rep_->options.filter_policy->Name());
104 if (iter->Valid() && iter->key() == Slice(key)) {
105 ReadFilter(iter->value());
111 void Table::ReadFilter(const Slice& filter_handle_value) {
112 Slice v = filter_handle_value;
113 BlockHandle filter_handle;
114 if (!filter_handle.DecodeFrom(&v).ok()) {
118 // We might want to unify with ReadBlock() if we start
119 // requiring checksum verification in Table::Open.
121 if (rep_->options.paranoid_checks) {
122 opt.verify_checksums = true;
125 if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
128 if (block.heap_allocated) {
129 rep_->filter_data = block.data.data(); // Will need to delete later
131 rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
134 Table::~Table() { delete rep_; }
136 static void DeleteBlock(void* arg, void* ignored) {
137 delete reinterpret_cast<Block*>(arg);
140 static void DeleteCachedBlock(const Slice& key, void* value) {
141 Block* block = reinterpret_cast<Block*>(value);
145 static void ReleaseBlock(void* arg, void* h) {
146 Cache* cache = reinterpret_cast<Cache*>(arg);
147 Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
148 cache->Release(handle);
151 // Convert an index iterator value (i.e., an encoded BlockHandle)
152 // into an iterator over the contents of the corresponding block.
153 Iterator* Table::BlockReader(void* arg, const ReadOptions& options,
154 const Slice& index_value) {
155 Table* table = reinterpret_cast<Table*>(arg);
156 Cache* block_cache = table->rep_->options.block_cache;
157 Block* block = nullptr;
158 Cache::Handle* cache_handle = nullptr;
161 Slice input = index_value;
162 Status s = handle.DecodeFrom(&input);
163 // We intentionally allow extra stuff in index_value so that we
164 // can add more features in the future.
167 BlockContents contents;
168 if (block_cache != nullptr) {
169 char cache_key_buffer[16];
170 EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
171 EncodeFixed64(cache_key_buffer + 8, handle.offset());
172 Slice key(cache_key_buffer, sizeof(cache_key_buffer));
173 cache_handle = block_cache->Lookup(key);
174 if (cache_handle != nullptr) {
175 block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
177 s = ReadBlock(table->rep_->file, options, handle, &contents);
179 block = new Block(contents);
180 if (contents.cachable && options.fill_cache) {
181 cache_handle = block_cache->Insert(key, block, block->size(),
187 s = ReadBlock(table->rep_->file, options, handle, &contents);
189 block = new Block(contents);
195 if (block != nullptr) {
196 iter = block->NewIterator(table->rep_->options.comparator);
197 if (cache_handle == nullptr) {
198 iter->RegisterCleanup(&DeleteBlock, block, nullptr);
200 iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
203 iter = NewErrorIterator(s);
208 Iterator* Table::NewIterator(const ReadOptions& options) const {
209 return NewTwoLevelIterator(
210 rep_->index_block->NewIterator(rep_->options.comparator),
211 &Table::BlockReader, const_cast<Table*>(this), options);
214 Status Table::InternalGet(const ReadOptions& options, const Slice& k, void* arg,
215 void (*handle_result)(void*, const Slice&,
218 Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
220 if (iiter->Valid()) {
221 Slice handle_value = iiter->value();
222 FilterBlockReader* filter = rep_->filter;
224 if (filter != nullptr && handle.DecodeFrom(&handle_value).ok() &&
225 !filter->KeyMayMatch(handle.offset(), k)) {
228 Iterator* block_iter = BlockReader(this, options, iiter->value());
230 if (block_iter->Valid()) {
231 (*handle_result)(arg, block_iter->key(), block_iter->value());
233 s = block_iter->status();
244 uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
245 Iterator* index_iter =
246 rep_->index_block->NewIterator(rep_->options.comparator);
247 index_iter->Seek(key);
249 if (index_iter->Valid()) {
251 Slice input = index_iter->value();
252 Status s = handle.DecodeFrom(&input);
254 result = handle.offset();
256 // Strange: we can't decode the block handle in the index block.
257 // We'll just return the offset of the metaindex block, which is
258 // close to the whole file size for this case.
259 result = rep_->metaindex_handle.offset();
262 // key is past the last key in the file. Approximate the offset
263 // by returning the offset of the metaindex block (which is
264 // right near the end of the file).
265 result = rep_->metaindex_handle.offset();
271 } // namespace leveldb