/Users/eugenesiegel/btc/bitcoin/src/cuckoocache.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // Copyright (c) 2016 Jeremy Rubin | 
| 2 |  | // Distributed under the MIT software license, see the accompanying | 
| 3 |  | // file COPYING or http://www.opensource.org/licenses/mit-license.php. | 
| 4 |  |  | 
| 5 |  | #ifndef BITCOIN_CUCKOOCACHE_H | 
| 6 |  | #define BITCOIN_CUCKOOCACHE_H | 
| 7 |  |  | 
| 8 |  | #include <util/fastrange.h> | 
| 9 |  |  | 
| 10 |  | #include <algorithm> | 
| 11 |  | #include <array> | 
| 12 |  | #include <atomic> | 
| 13 |  | #include <cmath> | 
| 14 |  | #include <cstring> | 
| 15 |  | #include <limits> | 
| 16 |  | #include <memory> | 
| 17 |  | #include <utility> | 
| 18 |  | #include <vector> | 
| 19 |  |  | 
| 20 |  |  | 
| 21 |  | /** High-performance cache primitives. | 
| 22 |  |  * | 
| 23 |  |  * Summary: | 
| 24 |  |  * | 
| 25 |  |  * 1. @ref bit_packed_atomic_flags is bit-packed atomic flags for garbage collection | 
| 26 |  |  * | 
| 27 |  |  * 2. @ref cache is a cache which is performant in memory usage and lookup speed. It | 
| 28 |  |  * is lockfree for erase operations. Elements are lazily erased on the next insert. | 
| 29 |  |  */ | 
| 30 |  | namespace CuckooCache | 
| 31 |  | { | 
| 32 |  | /** @ref bit_packed_atomic_flags implements a container for garbage collection flags | 
| 33 |  |  * that is only thread unsafe on calls to setup. This class bit-packs collection | 
| 34 |  |  * flags for memory efficiency. | 
| 35 |  |  * | 
| 36 |  |  * All operations are `std::memory_order_relaxed` so external mechanisms must | 
| 37 |  |  * ensure that writes and reads are properly synchronized. | 
| 38 |  |  * | 
| 39 |  |  * On setup(n), all bits up to `n` are marked as collected. | 
| 40 |  |  * | 
| 41 |  |  * Under the hood, because it is an 8-bit type, it makes sense to use a multiple | 
| 42 |  |  * of 8 for setup, but it will be safe if that is not the case as well. | 
| 43 |  |  */ | 
| 44 |  | class bit_packed_atomic_flags | 
| 45 |  | { | 
| 46 |  |     std::unique_ptr<std::atomic<uint8_t>[]> mem; | 
| 47 |  |  | 
| 48 |  | public: | 
| 49 |  |     /** No default constructor, as there must be some size. */ | 
| 50 |  |     bit_packed_atomic_flags() = delete; | 
| 51 |  |  | 
| 52 |  |     /** | 
| 53 |  |      * bit_packed_atomic_flags constructor creates memory to sufficiently | 
| 54 |  |      * keep track of garbage collection information for `size` entries. | 
| 55 |  |      * | 
| 56 |  |      * @param size the number of elements to allocate space for | 
| 57 |  |      * | 
| 58 |  |      * @post bit_set, bit_unset, and bit_is_set function properly forall x. x < | 
| 59 |  |      * size | 
| 60 |  |      * @post All calls to bit_is_set (without subsequent bit_unset) will return | 
| 61 |  |      * true. | 
| 62 |  |      */ | 
| 63 |  |     explicit bit_packed_atomic_flags(uint32_t size) | 
| 64 | 205k |     { | 
| 65 |  |         // pad out the size if needed | 
| 66 | 205k |         size = (size + 7) / 8; | 
| 67 | 205k |         mem.reset(new std::atomic<uint8_t>[size]); | 
| 68 | 6.72G |         for (uint32_t i = 0; i < size; ++i6.72G) | 
| 69 | 6.72G |             mem[i].store(0xFF); | 
| 70 | 205k |     }; | 
| 71 |  |  | 
| 72 |  |     /** setup marks all entries and ensures that bit_packed_atomic_flags can store | 
| 73 |  |      * at least `b` entries. | 
| 74 |  |      * | 
| 75 |  |      * @param b the number of elements to allocate space for | 
| 76 |  |      * @post bit_set, bit_unset, and bit_is_set function properly forall x. x < | 
| 77 |  |      * b | 
| 78 |  |      * @post All calls to bit_is_set (without subsequent bit_unset) will return | 
| 79 |  |      * true. | 
| 80 |  |      */ | 
| 81 |  |     inline void setup(uint32_t b) | 
| 82 | 102k |     { | 
| 83 | 102k |         bit_packed_atomic_flags d(b); | 
| 84 | 102k |         std::swap(mem, d.mem); | 
| 85 | 102k |     } | 
| 86 |  |  | 
| 87 |  |     /** bit_set sets an entry as discardable. | 
| 88 |  |      * | 
| 89 |  |      * @param s the index of the entry to bit_set | 
| 90 |  |      * @post immediately subsequent call (assuming proper external memory | 
| 91 |  |      * ordering) to bit_is_set(s) == true. | 
| 92 |  |      */ | 
| 93 |  |     inline void bit_set(uint32_t s) | 
| 94 | 95.9k |     { | 
| 95 | 95.9k |         mem[s >> 3].fetch_or(uint8_t(1 << (s & 7)), std::memory_order_relaxed); | 
| 96 | 95.9k |     } | 
| 97 |  |  | 
| 98 |  |     /** bit_unset marks an entry as something that should not be overwritten. | 
| 99 |  |      * | 
| 100 |  |      * @param s the index of the entry to bit_unset | 
| 101 |  |      * @post immediately subsequent call (assuming proper external memory | 
| 102 |  |      * ordering) to bit_is_set(s) == false. | 
| 103 |  |      */ | 
| 104 |  |     inline void bit_unset(uint32_t s) | 
| 105 | 493k |     { | 
| 106 | 493k |         mem[s >> 3].fetch_and(uint8_t(~(1 << (s & 7))), std::memory_order_relaxed); | 
| 107 | 493k |     } | 
| 108 |  |  | 
| 109 |  |     /** bit_is_set queries the table for discardability at `s`. | 
| 110 |  |      * | 
| 111 |  |      * @param s the index of the entry to read | 
| 112 |  |      * @returns true if the bit at index `s` was set, false otherwise | 
| 113 |  |      * */ | 
| 114 |  |     inline bool bit_is_set(uint32_t s) const | 
| 115 | 493k |     { | 
| 116 | 493k |         return (1 << (s & 7)) & mem[s >> 3].load(std::memory_order_relaxed); | 
| 117 | 493k |     } | 
| 118 |  | }; | 
| 119 |  |  | 
| 120 |  | /** @ref cache implements a cache with properties similar to a cuckoo-set. | 
| 121 |  |  * | 
| 122 |  |  *  The cache is able to hold up to `(~(uint32_t)0) - 1` elements. | 
| 123 |  |  * | 
| 124 |  |  *  Read Operations: | 
| 125 |  |  *      - contains() for `erase=false` | 
| 126 |  |  * | 
| 127 |  |  *  Read+Erase Operations: | 
| 128 |  |  *      - contains() for `erase=true` | 
| 129 |  |  * | 
| 130 |  |  *  Erase Operations: | 
| 131 |  |  *      - allow_erase() | 
| 132 |  |  * | 
| 133 |  |  *  Write Operations: | 
| 134 |  |  *      - setup() | 
| 135 |  |  *      - setup_bytes() | 
| 136 |  |  *      - insert() | 
| 137 |  |  *      - please_keep() | 
| 138 |  |  * | 
| 139 |  |  *  Synchronization Free Operations: | 
| 140 |  |  *      - invalid() | 
| 141 |  |  *      - compute_hashes() | 
| 142 |  |  * | 
| 143 |  |  * User Must Guarantee: | 
| 144 |  |  * | 
| 145 |  |  * 1. Write requires synchronized access (e.g. a lock) | 
| 146 |  |  * 2. Read requires no concurrent Write, synchronized with last insert. | 
| 147 |  |  * 3. Erase requires no concurrent Write, synchronized with last insert. | 
| 148 |  |  * 4. An Erase caller must release all memory before allowing a new Writer. | 
| 149 |  |  * | 
| 150 |  |  * | 
| 151 |  |  * Note on function names: | 
| 152 |  |  *   - The name "allow_erase" is used because the real discard happens later. | 
| 153 |  |  *   - The name "please_keep" is used because elements may be erased anyways on insert. | 
| 154 |  |  * | 
| 155 |  |  * @tparam Element should be a movable and copyable type | 
| 156 |  |  * @tparam Hash should be a function/callable which takes a template parameter | 
| 157 |  |  * hash_select and an Element and extracts a hash from it. Should return | 
| 158 |  |  * high-entropy uint32_t hashes for `Hash h; h<0>(e) ... h<7>(e)`. | 
| 159 |  |  */ | 
| 160 |  | template <typename Element, typename Hash> | 
| 161 |  | class cache | 
| 162 |  | { | 
| 163 |  | private: | 
| 164 |  |     /** table stores all the elements */ | 
| 165 |  |     std::vector<Element> table; | 
| 166 |  |  | 
| 167 |  |     /** size stores the total available slots in the hash table */ | 
| 168 |  |     uint32_t size{0}; | 
| 169 |  |  | 
| 170 |  |     /** The bit_packed_atomic_flags array is marked mutable because we want | 
| 171 |  |      * garbage collection to be allowed to occur from const methods */ | 
| 172 |  |     mutable bit_packed_atomic_flags collection_flags; | 
| 173 |  |  | 
| 174 |  |     /** epoch_flags tracks how recently an element was inserted into | 
| 175 |  |      * the cache. true denotes recent, false denotes not-recent. See insert() | 
| 176 |  |      * method for full semantics. | 
| 177 |  |      */ | 
| 178 |  |     mutable std::vector<bool> epoch_flags; | 
| 179 |  |  | 
| 180 |  |     /** epoch_heuristic_counter is used to determine when an epoch might be aged | 
| 181 |  |      * & an expensive scan should be done. epoch_heuristic_counter is | 
| 182 |  |      * decremented on insert and reset to the new number of inserts which would | 
| 183 |  |      * cause the epoch to reach epoch_size when it reaches zero. | 
| 184 |  |      */ | 
| 185 |  |     uint32_t epoch_heuristic_counter{0}; | 
| 186 |  |  | 
| 187 |  |     /** epoch_size is set to be the number of elements supposed to be in a | 
| 188 |  |      * epoch. When the number of non-erased elements in an epoch | 
| 189 |  |      * exceeds epoch_size, a new epoch should be started and all | 
| 190 |  |      * current entries demoted. epoch_size is set to be 45% of size because | 
| 191 |  |      * we want to keep load around 90%, and we support 3 epochs at once -- | 
| 192 |  |      * one "dead" which has been erased, one "dying" which has been marked to be | 
| 193 |  |      * erased next, and one "living" which new inserts add to. | 
| 194 |  |      */ | 
| 195 |  |     uint32_t epoch_size{0}; | 
| 196 |  |  | 
| 197 |  |     /** depth_limit determines how many elements insert should try to replace. | 
| 198 |  |      * Should be set to log2(n). | 
| 199 |  |      */ | 
| 200 |  |     uint8_t depth_limit{0}; | 
| 201 |  |  | 
| 202 |  |     /** hash_function is a const instance of the hash function. It cannot be | 
| 203 |  |      * static or initialized at call time as it may have internal state (such as | 
| 204 |  |      * a nonce). | 
| 205 |  |      */ | 
| 206 |  |     const Hash hash_function; | 
| 207 |  |  | 
| 208 |  |     /** compute_hashes is convenience for not having to write out this | 
| 209 |  |      * expression everywhere we use the hash values of an Element. | 
| 210 |  |      * | 
| 211 |  |      * We need to map the 32-bit input hash onto a hash bucket in a range [0, size) in a | 
| 212 |  |      *  manner which preserves as much of the hash's uniformity as possible. Ideally | 
| 213 |  |      *  this would be done by bitmasking but the size is usually not a power of two. | 
| 214 |  |      * | 
| 215 |  |      * The naive approach would be to use a mod -- which isn't perfectly uniform but so | 
| 216 |  |      *  long as the hash is much larger than size it is not that bad. Unfortunately, | 
| 217 |  |      *  mod/division is fairly slow on ordinary microprocessors (e.g. 90-ish cycles on | 
| 218 |  |      *  haswell, ARM doesn't even have an instruction for it.); when the divisor is a | 
| 219 |  |      *  constant the compiler will do clever tricks to turn it into a multiply+add+shift, | 
| 220 |  |      *  but size is a run-time value so the compiler can't do that here. | 
| 221 |  |      * | 
| 222 |  |      * One option would be to implement the same trick the compiler uses and compute the | 
| 223 |  |      *  constants for exact division based on the size, as described in "{N}-bit Unsigned | 
| 224 |  |      *  Division via {N}-bit Multiply-Add" by Arch D. Robison in 2005. But that code is | 
| 225 |  |      *  somewhat complicated and the result is still slower than an even simpler option: | 
| 226 |  |      *  see the FastRange32 function in util/fastrange.h. | 
| 227 |  |      * | 
| 228 |  |      * The resulting non-uniformity is also more equally distributed which would be | 
| 229 |  |      *  advantageous for something like linear probing, though it shouldn't matter | 
| 230 |  |      *  one way or the other for a cuckoo table. | 
| 231 |  |      * | 
| 232 |  |      * The primary disadvantage of this approach is increased intermediate precision is | 
| 233 |  |      *  required but for a 32-bit random number we only need the high 32 bits of a | 
| 234 |  |      *  32*32->64 multiply, which means the operation is reasonably fast even on a | 
| 235 |  |      *  typical 32-bit processor. | 
| 236 |  |      * | 
| 237 |  |      * @param e The element whose hashes will be returned | 
| 238 |  |      * @returns Deterministic hashes derived from `e` uniformly mapped onto the range [0, size) | 
| 239 |  |      */ | 
| 240 |  |     inline std::array<uint32_t, 8> compute_hashes(const Element& e) const | 
| 241 | 1.58M |     { | 
| 242 | 1.58M |         return {{FastRange32(hash_function.template operator()<0>(e), size), | 
| 243 | 1.58M |                  FastRange32(hash_function.template operator()<1>(e), size), | 
| 244 | 1.58M |                  FastRange32(hash_function.template operator()<2>(e), size), | 
| 245 | 1.58M |                  FastRange32(hash_function.template operator()<3>(e), size), | 
| 246 | 1.58M |                  FastRange32(hash_function.template operator()<4>(e), size), | 
| 247 | 1.58M |                  FastRange32(hash_function.template operator()<5>(e), size), | 
| 248 | 1.58M |                  FastRange32(hash_function.template operator()<6>(e), size), | 
| 249 | 1.58M |                  FastRange32(hash_function.template operator()<7>(e), size)}}; | 
| 250 | 1.58M |     } Unexecuted instantiation: cuckoocache.cpp:_ZNK11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE14compute_hashesERKi_ZNK11CuckooCache5cacheI7uint25620SignatureCacheHasherE14compute_hashesERKS1_| Line | Count | Source |  | 241 | 1.58M |     { |  | 242 | 1.58M |         return {{FastRange32(hash_function.template operator()<0>(e), size), |  | 243 | 1.58M |                  FastRange32(hash_function.template operator()<1>(e), size), |  | 244 | 1.58M |                  FastRange32(hash_function.template operator()<2>(e), size), |  | 245 | 1.58M |                  FastRange32(hash_function.template operator()<3>(e), size), |  | 246 | 1.58M |                  FastRange32(hash_function.template operator()<4>(e), size), |  | 247 | 1.58M |                  FastRange32(hash_function.template operator()<5>(e), size), |  | 248 | 1.58M |                  FastRange32(hash_function.template operator()<6>(e), size), |  | 249 | 1.58M |                  FastRange32(hash_function.template operator()<7>(e), size)}}; |  | 250 | 1.58M |     } | 
 | 
| 251 |  |  | 
| 252 |  |     /** invalid returns a special index that can never be inserted to | 
| 253 |  |      * @returns the special constexpr index that can never be inserted to */ | 
| 254 |  |     constexpr uint32_t invalid() const | 
| 255 | 493k |     { | 
| 256 | 493k |         return ~(uint32_t)0; | 
| 257 | 493k |     } Unexecuted instantiation: cuckoocache.cpp:_ZNK11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE7invalidEv_ZNK11CuckooCache5cacheI7uint25620SignatureCacheHasherE7invalidEv| Line | Count | Source |  | 255 | 493k |     { |  | 256 | 493k |         return ~(uint32_t)0; |  | 257 | 493k |     } | 
 | 
| 258 |  |  | 
| 259 |  |     /** allow_erase marks the element at index `n` as discardable. Threadsafe | 
| 260 |  |      * without any concurrent insert. | 
| 261 |  |      * @param n the index to allow erasure of | 
| 262 |  |      */ | 
| 263 |  |     inline void allow_erase(uint32_t n) const | 
| 264 | 95.9k |     { | 
| 265 | 95.9k |         collection_flags.bit_set(n); | 
| 266 | 95.9k |     } Unexecuted instantiation: cuckoocache.cpp:_ZNK11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE11allow_eraseEj_ZNK11CuckooCache5cacheI7uint25620SignatureCacheHasherE11allow_eraseEj| Line | Count | Source |  | 264 | 95.9k |     { |  | 265 | 95.9k |         collection_flags.bit_set(n); |  | 266 | 95.9k |     } | 
 | 
| 267 |  |  | 
| 268 |  |     /** please_keep marks the element at index `n` as an entry that should be kept. | 
| 269 |  |      * Threadsafe without any concurrent insert. | 
| 270 |  |      * @param n the index to prioritize keeping | 
| 271 |  |      */ | 
| 272 |  |     inline void please_keep(uint32_t n) const | 
| 273 | 493k |     { | 
| 274 | 493k |         collection_flags.bit_unset(n); | 
| 275 | 493k |     } Unexecuted instantiation: cuckoocache.cpp:_ZNK11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE11please_keepEj_ZNK11CuckooCache5cacheI7uint25620SignatureCacheHasherE11please_keepEj| Line | Count | Source |  | 273 | 493k |     { |  | 274 | 493k |         collection_flags.bit_unset(n); |  | 275 | 493k |     } | 
 | 
| 276 |  |  | 
| 277 |  |     /** epoch_check handles the changing of epochs for elements stored in the | 
| 278 |  |      * cache. epoch_check should be run before every insert. | 
| 279 |  |      * | 
| 280 |  |      * First, epoch_check decrements and checks the cheap heuristic, and then does | 
| 281 |  |      * a more expensive scan if the cheap heuristic runs out. If the expensive | 
| 282 |  |      * scan succeeds, the epochs are aged and old elements are allow_erased. The | 
| 283 |  |      * cheap heuristic is reset to retrigger after the worst case growth of the | 
| 284 |  |      * current epoch's elements would exceed the epoch_size. | 
| 285 |  |      */ | 
| 286 |  |     void epoch_check() | 
| 287 | 493k |     { | 
| 288 | 493k |         if (epoch_heuristic_counter != 0) { | 
| 289 | 493k |             --epoch_heuristic_counter; | 
| 290 | 493k |             return; | 
| 291 | 493k |         } | 
| 292 |  |         // count the number of elements from the latest epoch which | 
| 293 |  |         // have not been erased. | 
| 294 | 0 |         uint32_t epoch_unused_count = 0; | 
| 295 | 0 |         for (uint32_t i = 0; i < size; ++i) | 
| 296 | 0 |             epoch_unused_count += epoch_flags[i] && | 
| 297 | 0 |                                   !collection_flags.bit_is_set(i); | 
| 298 |  |         // If there are more non-deleted entries in the current epoch than the | 
| 299 |  |         // epoch size, then allow_erase on all elements in the old epoch (marked | 
| 300 |  |         // false) and move all elements in the current epoch to the old epoch | 
| 301 |  |         // but do not call allow_erase on their indices. | 
| 302 | 0 |         if (epoch_unused_count >= epoch_size) { | 
| 303 | 0 |             for (uint32_t i = 0; i < size; ++i) | 
| 304 | 0 |                 if (epoch_flags[i]) | 
| 305 | 0 |                     epoch_flags[i] = false; | 
| 306 | 0 |                 else | 
| 307 | 0 |                     allow_erase(i); | 
| 308 | 0 |             epoch_heuristic_counter = epoch_size; | 
| 309 | 0 |         } else | 
| 310 |  |             // reset the epoch_heuristic_counter to next do a scan when worst | 
| 311 |  |             // case behavior (no intermittent erases) would exceed epoch size, | 
| 312 |  |             // with a reasonable minimum scan size. | 
| 313 |  |             // Ordinarily, we would have to sanity check std::min(epoch_size, | 
| 314 |  |             // epoch_unused_count), but we already know that `epoch_unused_count | 
| 315 |  |             // < epoch_size` in this branch | 
| 316 | 0 |             epoch_heuristic_counter = std::max(1u, std::max(epoch_size / 16, | 
| 317 | 0 |                         epoch_size - epoch_unused_count)); | 
| 318 | 0 |     } Unexecuted instantiation: cuckoocache.cpp:_ZN11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE11epoch_checkEv_ZN11CuckooCache5cacheI7uint25620SignatureCacheHasherE11epoch_checkEv| Line | Count | Source |  | 287 | 493k |     { |  | 288 | 493k |         if (epoch_heuristic_counter != 0) { |  | 289 | 493k |             --epoch_heuristic_counter; |  | 290 | 493k |             return; |  | 291 | 493k |         } |  | 292 |  |         // count the number of elements from the latest epoch which |  | 293 |  |         // have not been erased. |  | 294 | 0 |         uint32_t epoch_unused_count = 0; |  | 295 | 0 |         for (uint32_t i = 0; i < size; ++i) |  | 296 | 0 |             epoch_unused_count += epoch_flags[i] && |  | 297 | 0 |                                   !collection_flags.bit_is_set(i); |  | 298 |  |         // If there are more non-deleted entries in the current epoch than the |  | 299 |  |         // epoch size, then allow_erase on all elements in the old epoch (marked |  | 300 |  |         // false) and move all elements in the current epoch to the old epoch |  | 301 |  |         // but do not call allow_erase on their indices. |  | 302 | 0 |         if (epoch_unused_count >= epoch_size) { |  | 303 | 0 |             for (uint32_t i = 0; i < size; ++i) |  | 304 | 0 |                 if (epoch_flags[i]) |  | 305 | 0 |                     epoch_flags[i] = false; |  | 306 | 0 |                 else |  | 307 | 0 |                     allow_erase(i); |  | 308 | 0 |             epoch_heuristic_counter = epoch_size; |  | 309 | 0 |         } else |  | 310 |  |             // reset the epoch_heuristic_counter to next do a scan when worst |  | 311 |  |             // case behavior (no intermittent erases) would exceed epoch size, |  | 312 |  |             // with a reasonable minimum scan size. |  | 313 |  |             // Ordinarily, we would have to sanity check std::min(epoch_size, |  | 314 |  |             // epoch_unused_count), but we already know that `epoch_unused_count |  | 315 |  |             // < epoch_size` in this branch |  | 316 | 0 |             epoch_heuristic_counter = std::max(1u, std::max(epoch_size / 16, |  | 317 | 0 |                         epoch_size - epoch_unused_count)); |  | 318 | 0 |     } | 
 | 
| 319 |  |  | 
| 320 |  | public: | 
| 321 |  |     /** You must always construct a cache with some elements via a subsequent | 
| 322 |  |      * call to setup or setup_bytes, otherwise operations may segfault. | 
| 323 |  |      */ | 
| 324 | 102k |     cache() : table(), collection_flags(0), epoch_flags(), hash_function() | 
| 325 | 102k |     { | 
| 326 | 102k |     } Unexecuted instantiation: cuckoocache.cpp:_ZN11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEEC2Ev_ZN11CuckooCache5cacheI7uint25620SignatureCacheHasherEC2Ev| Line | Count | Source |  | 324 | 102k |     cache() : table(), collection_flags(0), epoch_flags(), hash_function() |  | 325 | 102k |     { |  | 326 | 102k |     } | 
 | 
| 327 |  |  | 
| 328 |  |     /** setup initializes the container to store no more than new_size | 
| 329 |  |      * elements and no less than 2 elements. | 
| 330 |  |      * | 
| 331 |  |      * setup should only be called once. | 
| 332 |  |      * | 
| 333 |  |      * @param new_size the desired number of elements to store | 
| 334 |  |      * @returns the maximum number of elements storable | 
| 335 |  |      */ | 
| 336 |  |     uint32_t setup(uint32_t new_size) | 
| 337 | 102k |     { | 
| 338 |  |         // depth_limit must be at least one otherwise errors can occur. | 
| 339 | 102k |         size = std::max<uint32_t>(2, new_size); | 
| 340 | 102k |         depth_limit = static_cast<uint8_t>(std::log2(static_cast<float>(size))); | 
| 341 | 102k |         table.resize(size); | 
| 342 | 102k |         collection_flags.setup(size); | 
| 343 | 102k |         epoch_flags.resize(size); | 
| 344 |  |         // Set to 45% as described above | 
| 345 | 102k |         epoch_size = std::max(uint32_t{1}, (45 * size) / 100); | 
| 346 |  |         // Initially set to wait for a whole epoch | 
| 347 | 102k |         epoch_heuristic_counter = epoch_size; | 
| 348 | 102k |         return size; | 
| 349 | 102k |     } Unexecuted instantiation: cuckoocache.cpp:_ZN11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE5setupEj_ZN11CuckooCache5cacheI7uint25620SignatureCacheHasherE5setupEj| Line | Count | Source |  | 337 | 102k |     { |  | 338 |  |         // depth_limit must be at least one otherwise errors can occur. |  | 339 | 102k |         size = std::max<uint32_t>(2, new_size); |  | 340 | 102k |         depth_limit = static_cast<uint8_t>(std::log2(static_cast<float>(size))); |  | 341 | 102k |         table.resize(size); |  | 342 | 102k |         collection_flags.setup(size); |  | 343 | 102k |         epoch_flags.resize(size); |  | 344 |  |         // Set to 45% as described above |  | 345 | 102k |         epoch_size = std::max(uint32_t{1}, (45 * size) / 100); |  | 346 |  |         // Initially set to wait for a whole epoch |  | 347 | 102k |         epoch_heuristic_counter = epoch_size; |  | 348 | 102k |         return size; |  | 349 | 102k |     } | 
 | 
| 350 |  |  | 
| 351 |  |     /** setup_bytes is a convenience function which accounts for internal memory | 
| 352 |  |      * usage when deciding how many elements to store. It isn't perfect because | 
| 353 |  |      * it doesn't account for any overhead (struct size, MallocUsage, collection | 
| 354 |  |      * and epoch flags). This was done to simplify selecting a power of two | 
| 355 |  |      * size. In the expected use case, an extra two bits per entry should be | 
| 356 |  |      * negligible compared to the size of the elements. | 
| 357 |  |      * | 
| 358 |  |      * @param bytes the approximate number of bytes to use for this data | 
| 359 |  |      * structure | 
| 360 |  |      * @returns A pair of the maximum number of elements storable (see setup() | 
| 361 |  |      * documentation for more detail) and the approximate total size of these | 
| 362 |  |      * elements in bytes. | 
| 363 |  |      */ | 
| 364 |  |     std::pair<uint32_t, size_t> setup_bytes(size_t bytes) | 
| 365 | 102k |     { | 
| 366 | 102k |         uint32_t requested_num_elems(std::min<size_t>( | 
| 367 | 102k |             bytes / sizeof(Element), | 
| 368 | 102k |             std::numeric_limits<uint32_t>::max())); | 
| 369 |  |  | 
| 370 | 102k |         auto num_elems = setup(requested_num_elems); | 
| 371 |  |  | 
| 372 | 102k |         size_t approx_size_bytes = num_elems * sizeof(Element); | 
| 373 | 102k |         return std::make_pair(num_elems, approx_size_bytes); | 
| 374 | 102k |     } Unexecuted instantiation: cuckoocache.cpp:_ZN11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE11setup_bytesEm_ZN11CuckooCache5cacheI7uint25620SignatureCacheHasherE11setup_bytesEm| Line | Count | Source |  | 365 | 102k |     { |  | 366 | 102k |         uint32_t requested_num_elems(std::min<size_t>( |  | 367 | 102k |             bytes / sizeof(Element), |  | 368 | 102k |             std::numeric_limits<uint32_t>::max())); |  | 369 |  |  |  | 370 | 102k |         auto num_elems = setup(requested_num_elems); |  | 371 |  |  |  | 372 | 102k |         size_t approx_size_bytes = num_elems * sizeof(Element); |  | 373 | 102k |         return std::make_pair(num_elems, approx_size_bytes); |  | 374 | 102k |     } | 
 | 
| 375 |  |  | 
| 376 |  |     /** insert loops at most depth_limit times trying to insert a hash | 
| 377 |  |      * at various locations in the table via a variant of the Cuckoo Algorithm | 
| 378 |  |      * with eight hash locations. | 
| 379 |  |      * | 
| 380 |  |      * It drops the last tried element if it runs out of depth before | 
| 381 |  |      * encountering an open slot. | 
| 382 |  |      * | 
| 383 |  |      * Thus: | 
| 384 |  |      * | 
| 385 |  |      * ``` | 
| 386 |  |      * insert(x); | 
| 387 |  |      * return contains(x, false); | 
| 388 |  |      * ``` | 
| 389 |  |      * | 
| 390 |  |      * is not guaranteed to return true. | 
| 391 |  |      * | 
| 392 |  |      * @param e the element to insert | 
| 393 |  |      * @post one of the following: All previously inserted elements and e are | 
| 394 |  |      * now in the table, one previously inserted element is evicted from the | 
| 395 |  |      * table, the entry attempted to be inserted is evicted. | 
| 396 |  |      */ | 
| 397 |  |     inline void insert(Element e) | 
| 398 | 493k |     { | 
| 399 | 493k |         epoch_check(); | 
| 400 | 493k |         uint32_t last_loc = invalid(); | 
| 401 | 493k |         bool last_epoch = true; | 
| 402 | 493k |         std::array<uint32_t, 8> locs = compute_hashes(e); | 
| 403 |  |         // Make sure we have not already inserted this element | 
| 404 |  |         // If we have, make sure that it does not get deleted | 
| 405 | 493k |         for (const uint32_t loc : locs) | 
| 406 | 3.94M |             if (table[loc] == e) { | 
| 407 | 0 |                 please_keep(loc); | 
| 408 | 0 |                 epoch_flags[loc] = last_epoch; | 
| 409 | 0 |                 return; | 
| 410 | 0 |             } | 
| 411 | 493k |         for (uint8_t depth = 0; depth < depth_limit; ++depth0) { | 
| 412 |  |             // First try to insert to an empty slot, if one exists | 
| 413 | 493k |             for (const uint32_t loc : locs) { | 
| 414 | 493k |                 if (!collection_flags.bit_is_set(loc)) | 
| 415 | 32 |                     continue; | 
| 416 | 493k |                 table[loc] = std::move(e); | 
| 417 | 493k |                 please_keep(loc); | 
| 418 | 493k |                 epoch_flags[loc] = last_epoch; | 
| 419 | 493k |                 return; | 
| 420 | 493k |             } | 
| 421 |  |             /** Swap with the element at the location that was | 
| 422 |  |             * not the last one looked at. Example: | 
| 423 |  |             * | 
| 424 |  |             * 1. On first iteration, last_loc == invalid(), find returns last, so | 
| 425 |  |             *    last_loc defaults to locs[0]. | 
| 426 |  |             * 2. On further iterations, where last_loc == locs[k], last_loc will | 
| 427 |  |             *    go to locs[k+1 % 8], i.e., next of the 8 indices wrapping around | 
| 428 |  |             *    to 0 if needed. | 
| 429 |  |             * | 
| 430 |  |             * This prevents moving the element we just put in. | 
| 431 |  |             * | 
| 432 |  |             * The swap is not a move -- we must switch onto the evicted element | 
| 433 |  |             * for the next iteration. | 
| 434 |  |             */ | 
| 435 | 0 |             last_loc = locs[(1 + (std::find(locs.begin(), locs.end(), last_loc) - locs.begin())) & 7]; | 
| 436 | 0 |             std::swap(table[last_loc], e); | 
| 437 |  |             // Can't std::swap a std::vector<bool>::reference and a bool&. | 
| 438 | 0 |             bool epoch = last_epoch; | 
| 439 | 0 |             last_epoch = epoch_flags[last_loc]; | 
| 440 | 0 |             epoch_flags[last_loc] = epoch; | 
| 441 |  |  | 
| 442 |  |             // Recompute the locs -- unfortunately happens one too many times! | 
| 443 | 0 |             locs = compute_hashes(e); | 
| 444 | 0 |         } | 
| 445 | 493k |     } Unexecuted instantiation: cuckoocache.cpp:_ZN11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE6insertEi_ZN11CuckooCache5cacheI7uint25620SignatureCacheHasherE6insertES1_| Line | Count | Source |  | 398 | 493k |     { |  | 399 | 493k |         epoch_check(); |  | 400 | 493k |         uint32_t last_loc = invalid(); |  | 401 | 493k |         bool last_epoch = true; |  | 402 | 493k |         std::array<uint32_t, 8> locs = compute_hashes(e); |  | 403 |  |         // Make sure we have not already inserted this element |  | 404 |  |         // If we have, make sure that it does not get deleted |  | 405 | 493k |         for (const uint32_t loc : locs) |  | 406 | 3.94M |             if (table[loc] == e) { |  | 407 | 0 |                 please_keep(loc); |  | 408 | 0 |                 epoch_flags[loc] = last_epoch; |  | 409 | 0 |                 return; |  | 410 | 0 |             } |  | 411 | 493k |         for (uint8_t depth = 0; depth < depth_limit; ++depth0) { |  | 412 |  |             // First try to insert to an empty slot, if one exists |  | 413 | 493k |             for (const uint32_t loc : locs) { |  | 414 | 493k |                 if (!collection_flags.bit_is_set(loc)) |  | 415 | 32 |                     continue; |  | 416 | 493k |                 table[loc] = std::move(e); |  | 417 | 493k |                 please_keep(loc); |  | 418 | 493k |                 epoch_flags[loc] = last_epoch; |  | 419 | 493k |                 return; |  | 420 | 493k |             } |  | 421 |  |             /** Swap with the element at the location that was |  | 422 |  |             * not the last one looked at. Example: |  | 423 |  |             * |  | 424 |  |             * 1. On first iteration, last_loc == invalid(), find returns last, so |  | 425 |  |             *    last_loc defaults to locs[0]. |  | 426 |  |             * 2. On further iterations, where last_loc == locs[k], last_loc will |  | 427 |  |             *    go to locs[k+1 % 8], i.e., next of the 8 indices wrapping around |  | 428 |  |             *    to 0 if needed. |  | 429 |  |             * |  | 430 |  |             * This prevents moving the element we just put in. |  | 431 |  |             * |  | 432 |  |             * The swap is not a move -- we must switch onto the evicted element |  | 433 |  |             * for the next iteration. |  | 434 |  |             */ |  | 435 | 0 |             last_loc = locs[(1 + (std::find(locs.begin(), locs.end(), last_loc) - locs.begin())) & 7]; |  | 436 | 0 |             std::swap(table[last_loc], e); |  | 437 |  |             // Can't std::swap a std::vector<bool>::reference and a bool&. |  | 438 | 0 |             bool epoch = last_epoch; |  | 439 | 0 |             last_epoch = epoch_flags[last_loc]; |  | 440 | 0 |             epoch_flags[last_loc] = epoch; |  | 441 |  |  |  | 442 |  |             // Recompute the locs -- unfortunately happens one too many times! |  | 443 | 0 |             locs = compute_hashes(e); |  | 444 | 0 |         } |  | 445 | 493k |     } | 
 | 
| 446 |  |  | 
| 447 |  |     /** contains iterates through the hash locations for a given element | 
| 448 |  |      * and checks to see if it is present. | 
| 449 |  |      * | 
| 450 |  |      * contains does not check garbage collected state (in other words, | 
| 451 |  |      * garbage is only collected when the space is needed), so: | 
| 452 |  |      * | 
| 453 |  |      * ``` | 
| 454 |  |      * insert(x); | 
| 455 |  |      * if (contains(x, true)) | 
| 456 |  |      *     return contains(x, false); | 
| 457 |  |      * else | 
| 458 |  |      *     return true; | 
| 459 |  |      * ``` | 
| 460 |  |      * | 
| 461 |  |      * executed on a single thread will always return true! | 
| 462 |  |      * | 
| 463 |  |      * This is a great property for re-org performance for example. | 
| 464 |  |      * | 
| 465 |  |      * contains returns a bool set true if the element was found. | 
| 466 |  |      * | 
| 467 |  |      * @param e the element to check | 
| 468 |  |      * @param erase whether to attempt setting the garbage collect flag | 
| 469 |  |      * | 
| 470 |  |      * @post if erase is true and the element is found, then the garbage collect | 
| 471 |  |      * flag is set | 
| 472 |  |      * @returns true if the element is found, false otherwise | 
| 473 |  |      */ | 
| 474 |  |     inline bool contains(const Element& e, const bool erase) const | 
| 475 | 1.09M |     { | 
| 476 | 1.09M |         std::array<uint32_t, 8> locs = compute_hashes(e); | 
| 477 | 1.09M |         for (const uint32_t loc : locs) | 
| 478 | 8.08M |             if (table[loc] == e) { | 
| 479 | 96.0k |                 if (erase) | 
| 480 | 95.9k |                     allow_erase(loc); | 
| 481 | 96.0k |                 return true; | 
| 482 | 96.0k |             } | 
| 483 | 998k |         return false; | 
| 484 | 1.09M |     } Unexecuted instantiation: cuckoocache.cpp:_ZNK11CuckooCache5cacheIiN12_GLOBAL__N_112RandomHasherEE8containsERKib_ZNK11CuckooCache5cacheI7uint25620SignatureCacheHasherE8containsERKS1_b| Line | Count | Source |  | 475 | 1.09M |     { |  | 476 | 1.09M |         std::array<uint32_t, 8> locs = compute_hashes(e); |  | 477 | 1.09M |         for (const uint32_t loc : locs) |  | 478 | 8.08M |             if (table[loc] == e) { |  | 479 | 96.0k |                 if (erase) |  | 480 | 95.9k |                     allow_erase(loc); |  | 481 | 96.0k |                 return true; |  | 482 | 96.0k |             } |  | 483 | 998k |         return false; |  | 484 | 1.09M |     } | 
 | 
| 485 |  | }; | 
| 486 |  | } // namespace CuckooCache | 
| 487 |  |  | 
| 488 |  | #endif // BITCOIN_CUCKOOCACHE_H |