/Users/eugenesiegel/btc/bitcoin/src/wallet/migrate.cpp
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // Copyright (c) 2024-present The Bitcoin Core developers | 
| 2 |  | // Distributed under the MIT software license, see the accompanying | 
| 3 |  | // file COPYING or http://www.opensource.org/licenses/mit-license.php. | 
| 4 |  |  | 
| 5 |  | #include <compat/byteswap.h> | 
| 6 |  | #include <crypto/common.h> | 
| 7 |  | #include <logging.h> | 
| 8 |  | #include <streams.h> | 
| 9 |  | #include <util/translation.h> | 
| 10 |  | #include <wallet/migrate.h> | 
| 11 |  |  | 
| 12 |  | #include <array> | 
| 13 |  | #include <cstddef> | 
| 14 |  | #include <optional> | 
| 15 |  | #include <stdexcept> | 
| 16 |  | #include <variant> | 
| 17 |  | #include <vector> | 
| 18 |  |  | 
| 19 |  | namespace wallet { | 
| 20 |  | // Magic bytes in both endianness's | 
| 21 |  | constexpr uint32_t BTREE_MAGIC = 0x00053162;    // If the file endianness matches our system, we see this magic | 
| 22 |  | constexpr uint32_t BTREE_MAGIC_OE = 0x62310500; // If the file endianness is the other one, we will see this magic | 
| 23 |  |  | 
| 24 |  | // Subdatabase name | 
| 25 |  | static const std::vector<std::byte> SUBDATABASE_NAME = {std::byte{'m'}, std::byte{'a'}, std::byte{'i'}, std::byte{'n'}}; | 
| 26 |  |  | 
| 27 |  | enum class PageType : uint8_t { | 
| 28 |  |     /* | 
| 29 |  |      * BDB has several page types, most of which we do not use | 
| 30 |  |      * They are listed here for completeness, but commented out | 
| 31 |  |      * to avoid opening something unintended. | 
| 32 |  |     INVALID = 0,         // Invalid page type | 
| 33 |  |     DUPLICATE = 1,       // Duplicate. Deprecated and no longer used | 
| 34 |  |     HASH_UNSORTED = 2,   // Hash pages. Deprecated. | 
| 35 |  |     RECNO_INTERNAL = 4,  // Recno internal | 
| 36 |  |     RECNO_LEAF = 6,      // Recno leaf | 
| 37 |  |     HASH_META = 8,       // Hash metadata | 
| 38 |  |     QUEUE_META = 10,     // Queue Metadata | 
| 39 |  |     QUEUE_DATA = 11,     // Queue Data | 
| 40 |  |     DUPLICATE_LEAF = 12, // Off-page duplicate leaf | 
| 41 |  |     HASH_SORTED = 13,    // Sorted hash page | 
| 42 |  |     */ | 
| 43 |  |     BTREE_INTERNAL = 3, // BTree internal | 
| 44 |  |     BTREE_LEAF = 5,     // BTree leaf | 
| 45 |  |     OVERFLOW_DATA = 7,  // Overflow | 
| 46 |  |     BTREE_META = 9,     // BTree metadata | 
| 47 |  | }; | 
| 48 |  |  | 
| 49 |  | enum class RecordType : uint8_t { | 
| 50 |  |     KEYDATA = 1, | 
| 51 |  |     // DUPLICATE = 2,       Unused as our databases do not support duplicate records | 
| 52 |  |     OVERFLOW_DATA = 3, | 
| 53 |  |     DELETE = 0x80, // Indicate this record is deleted. This is OR'd with the real type. | 
| 54 |  | }; | 
| 55 |  |  | 
| 56 |  | enum class BTreeFlags : uint32_t { | 
| 57 |  |     /* | 
| 58 |  |      * BTree databases have feature flags, but we do not use them except for | 
| 59 |  |      * subdatabases. The unused flags are included for completeness, but commented out | 
| 60 |  |      * to avoid accidental use. | 
| 61 |  |     DUP = 1,         // Duplicates | 
| 62 |  |     RECNO = 2,       // Recno tree | 
| 63 |  |     RECNUM = 4,      // BTree: Maintain record counts | 
| 64 |  |     FIXEDLEN = 8,    // Recno: fixed length records | 
| 65 |  |     RENUMBER = 0x10, // Recno: renumber on insert/delete | 
| 66 |  |     DUPSORT = 0x40,  // Duplicates are sorted | 
| 67 |  |     COMPRESS = 0x80, // Compressed | 
| 68 |  |     */ | 
| 69 |  |     SUBDB = 0x20, // Subdatabases | 
| 70 |  | }; | 
| 71 |  |  | 
| 72 |  | /** Berkeley DB BTree metadata page layout */ | 
| 73 |  | class MetaPage | 
| 74 |  | { | 
| 75 |  | public: | 
| 76 |  |     uint32_t lsn_file;             // Log Sequence Number file | 
| 77 |  |     uint32_t lsn_offset;           // Log Sequence Number offset | 
| 78 |  |     uint32_t page_num;             // Current page number | 
| 79 |  |     uint32_t magic;                // Magic number | 
| 80 |  |     uint32_t version;              // Version | 
| 81 |  |     uint32_t pagesize;             // Page size | 
| 82 |  |     uint8_t encrypt_algo;          // Encryption algorithm | 
| 83 |  |     PageType type;                 // Page type | 
| 84 |  |     uint8_t metaflags;             // Meta-only flags | 
| 85 |  |     uint8_t unused1;               // Unused | 
| 86 |  |     uint32_t free_list;            // Free list page number | 
| 87 |  |     uint32_t last_page;            // Page number of last page in db | 
| 88 |  |     uint32_t partitions;           // Number of partitions | 
| 89 |  |     uint32_t key_count;            // Cached key count | 
| 90 |  |     uint32_t record_count;         // Cached record count | 
| 91 |  |     BTreeFlags flags;              // Flags | 
| 92 |  |     std::array<std::byte, 20> uid; // 20 byte unique file ID | 
| 93 |  |     uint32_t unused2;              // Unused | 
| 94 |  |     uint32_t minkey;               // Minimum key | 
| 95 |  |     uint32_t re_len;               // Recno: fixed length record length | 
| 96 |  |     uint32_t re_pad;               // Recno: fixed length record pad | 
| 97 |  |     uint32_t root;                 // Root page number | 
| 98 |  |     char unused3[368];             // 92 * 4 bytes of unused space | 
| 99 |  |     uint32_t crypto_magic;         // Crypto magic number | 
| 100 |  |     char trash[12];                // 3 * 4 bytes of trash space | 
| 101 |  |     unsigned char iv[20];          // Crypto IV | 
| 102 |  |     unsigned char chksum[16];      // Checksum | 
| 103 |  |  | 
| 104 |  |     bool other_endian; | 
| 105 |  |     uint32_t expected_page_num; | 
| 106 |  |  | 
| 107 | 0 |     MetaPage(uint32_t expected_page_num) : expected_page_num(expected_page_num) {} | 
| 108 |  |     MetaPage() = delete; | 
| 109 |  |  | 
| 110 |  |     template <typename Stream> | 
| 111 |  |     void Unserialize(Stream& s) | 
| 112 | 0 |     { | 
| 113 | 0 |         s >> lsn_file; | 
| 114 | 0 |         s >> lsn_offset; | 
| 115 | 0 |         s >> page_num; | 
| 116 | 0 |         s >> magic; | 
| 117 | 0 |         s >> version; | 
| 118 | 0 |         s >> pagesize; | 
| 119 | 0 |         s >> encrypt_algo; | 
| 120 |  | 
 | 
| 121 | 0 |         other_endian = magic == BTREE_MAGIC_OE; | 
| 122 |  | 
 | 
| 123 | 0 |         uint8_t uint8_type; | 
| 124 | 0 |         s >> uint8_type; | 
| 125 | 0 |         type = static_cast<PageType>(uint8_type); | 
| 126 |  | 
 | 
| 127 | 0 |         s >> metaflags; | 
| 128 | 0 |         s >> unused1; | 
| 129 | 0 |         s >> free_list; | 
| 130 | 0 |         s >> last_page; | 
| 131 | 0 |         s >> partitions; | 
| 132 | 0 |         s >> key_count; | 
| 133 | 0 |         s >> record_count; | 
| 134 |  | 
 | 
| 135 | 0 |         uint32_t uint32_flags; | 
| 136 | 0 |         s >> uint32_flags; | 
| 137 | 0 |         if (other_endian) { | 
| 138 | 0 |             uint32_flags = internal_bswap_32(uint32_flags); | 
| 139 | 0 |         } | 
| 140 | 0 |         flags = static_cast<BTreeFlags>(uint32_flags); | 
| 141 |  | 
 | 
| 142 | 0 |         s >> uid; | 
| 143 | 0 |         s >> unused2; | 
| 144 | 0 |         s >> minkey; | 
| 145 | 0 |         s >> re_len; | 
| 146 | 0 |         s >> re_pad; | 
| 147 | 0 |         s >> root; | 
| 148 | 0 |         s >> unused3; | 
| 149 | 0 |         s >> crypto_magic; | 
| 150 | 0 |         s >> trash; | 
| 151 | 0 |         s >> iv; | 
| 152 | 0 |         s >> chksum; | 
| 153 |  | 
 | 
| 154 | 0 |         if (other_endian) { | 
| 155 | 0 |             lsn_file = internal_bswap_32(lsn_file); | 
| 156 | 0 |             lsn_offset = internal_bswap_32(lsn_offset); | 
| 157 | 0 |             page_num = internal_bswap_32(page_num); | 
| 158 | 0 |             magic = internal_bswap_32(magic); | 
| 159 | 0 |             version = internal_bswap_32(version); | 
| 160 | 0 |             pagesize = internal_bswap_32(pagesize); | 
| 161 | 0 |             free_list = internal_bswap_32(free_list); | 
| 162 | 0 |             last_page = internal_bswap_32(last_page); | 
| 163 | 0 |             partitions = internal_bswap_32(partitions); | 
| 164 | 0 |             key_count = internal_bswap_32(key_count); | 
| 165 | 0 |             record_count = internal_bswap_32(record_count); | 
| 166 | 0 |             unused2 = internal_bswap_32(unused2); | 
| 167 | 0 |             minkey = internal_bswap_32(minkey); | 
| 168 | 0 |             re_len = internal_bswap_32(re_len); | 
| 169 | 0 |             re_pad = internal_bswap_32(re_pad); | 
| 170 | 0 |             root = internal_bswap_32(root); | 
| 171 | 0 |             crypto_magic = internal_bswap_32(crypto_magic); | 
| 172 | 0 |         } | 
| 173 |  |  | 
| 174 |  |         // Page number must match | 
| 175 | 0 |         if (page_num != expected_page_num) { | 
| 176 | 0 |             throw std::runtime_error("Meta page number mismatch"); | 
| 177 | 0 |         } | 
| 178 |  |  | 
| 179 |  |         // Check magic | 
| 180 | 0 |         if (magic != BTREE_MAGIC) { | 
| 181 | 0 |             throw std::runtime_error("Not a BDB file"); | 
| 182 | 0 |         } | 
| 183 |  |  | 
| 184 |  |         // Only version 9 is supported | 
| 185 | 0 |         if (version != 9) { | 
| 186 | 0 |             throw std::runtime_error("Unsupported BDB data file version number"); | 
| 187 | 0 |         } | 
| 188 |  |  | 
| 189 |  |         // Page size must be 512 <= pagesize <= 64k, and be a power of 2 | 
| 190 | 0 |         if (pagesize < 512 || pagesize > 65536 || (pagesize & (pagesize - 1)) != 0) { | 
| 191 | 0 |             throw std::runtime_error("Bad page size"); | 
| 192 | 0 |         } | 
| 193 |  |  | 
| 194 |  |         // Page type must be the btree type | 
| 195 | 0 |         if (type != PageType::BTREE_META) { | 
| 196 | 0 |             throw std::runtime_error("Unexpected page type, should be 9 (BTree Metadata)"); | 
| 197 | 0 |         } | 
| 198 |  |  | 
| 199 |  |         // Only supported meta-flag is subdatabase | 
| 200 | 0 |         if (flags != BTreeFlags::SUBDB) { | 
| 201 | 0 |             throw std::runtime_error("Unexpected database flags, should only be 0x20 (subdatabases)"); | 
| 202 | 0 |         } | 
| 203 | 0 |     } | 
| 204 |  | }; | 
| 205 |  |  | 
| 206 |  | /** General class for records in a BDB BTree database. Contains common fields. */ | 
| 207 |  | class RecordHeader | 
| 208 |  | { | 
| 209 |  | public: | 
| 210 |  |     uint16_t len;    // Key/data item length | 
| 211 |  |     RecordType type; // Page type (BDB has this include a DELETE FLAG that we track separately) | 
| 212 |  |     bool deleted;    // Whether the DELETE flag was set on type | 
| 213 |  |  | 
| 214 |  |     static constexpr size_t SIZE = 3; // The record header is 3 bytes | 
| 215 |  |  | 
| 216 |  |     bool other_endian; | 
| 217 |  |  | 
| 218 | 0 |     RecordHeader(bool other_endian) : other_endian(other_endian) {} | 
| 219 |  |     RecordHeader() = delete; | 
| 220 |  |  | 
| 221 |  |     template <typename Stream> | 
| 222 |  |     void Unserialize(Stream& s) | 
| 223 | 0 |     { | 
| 224 | 0 |         s >> len; | 
| 225 |  | 
 | 
| 226 | 0 |         uint8_t uint8_type; | 
| 227 | 0 |         s >> uint8_type; | 
| 228 | 0 |         type = static_cast<RecordType>(uint8_type & ~static_cast<uint8_t>(RecordType::DELETE)); | 
| 229 | 0 |         deleted = uint8_type & static_cast<uint8_t>(RecordType::DELETE); | 
| 230 |  | 
 | 
| 231 | 0 |         if (other_endian) { | 
| 232 | 0 |             len = internal_bswap_16(len); | 
| 233 | 0 |         } | 
| 234 | 0 |     } | 
| 235 |  | }; | 
| 236 |  |  | 
| 237 |  | /** Class for data in the record directly */ | 
| 238 |  | class DataRecord | 
| 239 |  | { | 
| 240 |  | public: | 
| 241 | 0 |     DataRecord(const RecordHeader& header) : m_header(header) {} | 
| 242 |  |     DataRecord() = delete; | 
| 243 |  |  | 
| 244 |  |     RecordHeader m_header; | 
| 245 |  |  | 
| 246 |  |     std::vector<std::byte> data; // Variable length key/data item | 
| 247 |  |  | 
| 248 |  |     template <typename Stream> | 
| 249 |  |     void Unserialize(Stream& s) | 
| 250 | 0 |     { | 
| 251 | 0 |         data.resize(m_header.len); | 
| 252 | 0 |         s.read(std::as_writable_bytes(std::span(data.data(), data.size()))); | 
| 253 | 0 |     } | 
| 254 |  | }; | 
| 255 |  |  | 
| 256 |  | /** Class for records representing internal nodes of the BTree. */ | 
| 257 |  | class InternalRecord | 
| 258 |  | { | 
| 259 |  | public: | 
| 260 | 0 |     InternalRecord(const RecordHeader& header) : m_header(header) {} | 
| 261 |  |     InternalRecord() = delete; | 
| 262 |  |  | 
| 263 |  |     RecordHeader m_header; | 
| 264 |  |  | 
| 265 |  |     uint8_t unused;              // Padding, unused | 
| 266 |  |     uint32_t page_num;           // Page number of referenced page | 
| 267 |  |     uint32_t records;            // Subtree record count | 
| 268 |  |     std::vector<std::byte> data; // Variable length key item | 
| 269 |  |  | 
| 270 |  |     static constexpr size_t FIXED_SIZE = 9; // Size of fixed data is 9 bytes | 
| 271 |  |  | 
| 272 |  |     template <typename Stream> | 
| 273 |  |     void Unserialize(Stream& s) | 
| 274 | 0 |     { | 
| 275 | 0 |         s >> unused; | 
| 276 | 0 |         s >> page_num; | 
| 277 | 0 |         s >> records; | 
| 278 |  | 
 | 
| 279 | 0 |         data.resize(m_header.len); | 
| 280 | 0 |         s.read(std::as_writable_bytes(std::span(data.data(), data.size()))); | 
| 281 |  | 
 | 
| 282 | 0 |         if (m_header.other_endian) { | 
| 283 | 0 |             page_num = internal_bswap_32(page_num); | 
| 284 | 0 |             records = internal_bswap_32(records); | 
| 285 | 0 |         } | 
| 286 | 0 |     } | 
| 287 |  | }; | 
| 288 |  |  | 
| 289 |  | /** Class for records representing overflow records of the BTree. | 
| 290 |  |  * Overflow records point to a page which contains the data in the record. | 
| 291 |  |  * Those pages may point to further pages with the rest of the data if it does not fit | 
| 292 |  |  * in one page */ | 
| 293 |  | class OverflowRecord | 
| 294 |  | { | 
| 295 |  | public: | 
| 296 | 0 |     OverflowRecord(const RecordHeader& header) : m_header(header) {} | 
| 297 |  |     OverflowRecord() = delete; | 
| 298 |  |  | 
| 299 |  |     RecordHeader m_header; | 
| 300 |  |  | 
| 301 |  |     uint8_t unused2;      // Padding, unused | 
| 302 |  |     uint32_t page_number; // Page number where data begins | 
| 303 |  |     uint32_t item_len;    // Total length of item | 
| 304 |  |  | 
| 305 |  |     static constexpr size_t SIZE = 9; // Overflow record is always 9 bytes | 
| 306 |  |  | 
| 307 |  |     template <typename Stream> | 
| 308 |  |     void Unserialize(Stream& s) | 
| 309 | 0 |     { | 
| 310 | 0 |         s >> unused2; | 
| 311 | 0 |         s >> page_number; | 
| 312 | 0 |         s >> item_len; | 
| 313 |  | 
 | 
| 314 | 0 |         if (m_header.other_endian) { | 
| 315 | 0 |             page_number = internal_bswap_32(page_number); | 
| 316 | 0 |             item_len = internal_bswap_32(item_len); | 
| 317 | 0 |         } | 
| 318 | 0 |     } | 
| 319 |  | }; | 
| 320 |  |  | 
| 321 |  | /** A generic data page in the database. Contains fields common to all data pages. */ | 
| 322 |  | class PageHeader | 
| 323 |  | { | 
| 324 |  | public: | 
| 325 |  |     uint32_t lsn_file;   // Log Sequence Number file | 
| 326 |  |     uint32_t lsn_offset; // Log Sequence Number offset | 
| 327 |  |     uint32_t page_num;   // Current page number | 
| 328 |  |     uint32_t prev_page;  // Previous page number | 
| 329 |  |     uint32_t next_page;  // Next page number | 
| 330 |  |     uint16_t entries;    // Number of items on the page | 
| 331 |  |     uint16_t hf_offset;  // High free byte page offset | 
| 332 |  |     uint8_t level;       // Btree page level | 
| 333 |  |     PageType type;       // Page type | 
| 334 |  |  | 
| 335 |  |     static constexpr int64_t SIZE = 26; // The header is 26 bytes | 
| 336 |  |  | 
| 337 |  |     uint32_t expected_page_num; | 
| 338 |  |     bool other_endian; | 
| 339 |  |  | 
| 340 | 0 |     PageHeader(uint32_t page_num, bool other_endian) : expected_page_num(page_num), other_endian(other_endian) {} | 
| 341 |  |     PageHeader() = delete; | 
| 342 |  |  | 
| 343 |  |     template <typename Stream> | 
| 344 |  |     void Unserialize(Stream& s) | 
| 345 | 0 |     { | 
| 346 | 0 |         s >> lsn_file; | 
| 347 | 0 |         s >> lsn_offset; | 
| 348 | 0 |         s >> page_num; | 
| 349 | 0 |         s >> prev_page; | 
| 350 | 0 |         s >> next_page; | 
| 351 | 0 |         s >> entries; | 
| 352 | 0 |         s >> hf_offset; | 
| 353 | 0 |         s >> level; | 
| 354 |  | 
 | 
| 355 | 0 |         uint8_t uint8_type; | 
| 356 | 0 |         s >> uint8_type; | 
| 357 | 0 |         type = static_cast<PageType>(uint8_type); | 
| 358 |  | 
 | 
| 359 | 0 |         if (other_endian) { | 
| 360 | 0 |             lsn_file = internal_bswap_32(lsn_file); | 
| 361 | 0 |             lsn_offset = internal_bswap_32(lsn_offset); | 
| 362 | 0 |             page_num = internal_bswap_32(page_num); | 
| 363 | 0 |             prev_page = internal_bswap_32(prev_page); | 
| 364 | 0 |             next_page = internal_bswap_32(next_page); | 
| 365 | 0 |             entries = internal_bswap_16(entries); | 
| 366 | 0 |             hf_offset = internal_bswap_16(hf_offset); | 
| 367 | 0 |         } | 
| 368 |  | 
 | 
| 369 | 0 |         if (expected_page_num != page_num) { | 
| 370 | 0 |             throw std::runtime_error("Page number mismatch"); | 
| 371 | 0 |         } | 
| 372 | 0 |         if ((type != PageType::OVERFLOW_DATA && level < 1) || (type == PageType::OVERFLOW_DATA && level != 0)) { | 
| 373 | 0 |             throw std::runtime_error("Bad btree level"); | 
| 374 | 0 |         } | 
| 375 | 0 |     } | 
| 376 |  | }; | 
| 377 |  |  | 
| 378 |  | /** A page of records in the database */ | 
| 379 |  | class RecordsPage | 
| 380 |  | { | 
| 381 |  | public: | 
| 382 | 0 |     RecordsPage(const PageHeader& header) : m_header(header) {} | 
| 383 |  |     RecordsPage() = delete; | 
| 384 |  |  | 
| 385 |  |     PageHeader m_header; | 
| 386 |  |  | 
| 387 |  |     std::vector<uint16_t> indexes; | 
| 388 |  |     std::vector<std::variant<DataRecord, OverflowRecord>> records; | 
| 389 |  |  | 
| 390 |  |     template <typename Stream> | 
| 391 |  |     void Unserialize(Stream& s) | 
| 392 | 0 |     { | 
| 393 |  |         // Current position within the page | 
| 394 | 0 |         int64_t pos = PageHeader::SIZE; | 
| 395 |  |  | 
| 396 |  |         // Get the items | 
| 397 | 0 |         for (uint32_t i = 0; i < m_header.entries; ++i) { | 
| 398 |  |             // Get the index | 
| 399 | 0 |             uint16_t index; | 
| 400 | 0 |             s >> index; | 
| 401 | 0 |             if (m_header.other_endian) { | 
| 402 | 0 |                 index = internal_bswap_16(index); | 
| 403 | 0 |             } | 
| 404 | 0 |             indexes.push_back(index); | 
| 405 | 0 |             pos += sizeof(uint16_t); | 
| 406 |  |  | 
| 407 |  |             // Go to the offset from the index | 
| 408 | 0 |             int64_t to_jump = index - pos; | 
| 409 | 0 |             if (to_jump < 0) { | 
| 410 | 0 |                 throw std::runtime_error("Data record position not in page"); | 
| 411 | 0 |             } | 
| 412 | 0 |             s.ignore(to_jump); | 
| 413 |  |  | 
| 414 |  |             // Read the record | 
| 415 | 0 |             RecordHeader rec_hdr(m_header.other_endian); | 
| 416 | 0 |             s >> rec_hdr; | 
| 417 | 0 |             to_jump += RecordHeader::SIZE; | 
| 418 |  | 
 | 
| 419 | 0 |             switch (rec_hdr.type) { | 
| 420 | 0 |             case RecordType::KEYDATA: { | 
| 421 | 0 |                 DataRecord record(rec_hdr); | 
| 422 | 0 |                 s >> record; | 
| 423 | 0 |                 records.emplace_back(record); | 
| 424 | 0 |                 to_jump += rec_hdr.len; | 
| 425 | 0 |                 break; | 
| 426 | 0 |             } | 
| 427 | 0 |             case RecordType::OVERFLOW_DATA: { | 
| 428 | 0 |                 OverflowRecord record(rec_hdr); | 
| 429 | 0 |                 s >> record; | 
| 430 | 0 |                 records.emplace_back(record); | 
| 431 | 0 |                 to_jump += OverflowRecord::SIZE; | 
| 432 | 0 |                 break; | 
| 433 | 0 |             } | 
| 434 | 0 |             default: | 
| 435 | 0 |                 throw std::runtime_error("Unknown record type in records page"); | 
| 436 | 0 |             } | 
| 437 |  |  | 
| 438 |  |             // Go back to the indexes | 
| 439 | 0 |             s.seek(-to_jump, SEEK_CUR); | 
| 440 | 0 |         } | 
| 441 | 0 |     } | 
| 442 |  | }; | 
| 443 |  |  | 
| 444 |  | /** A page containing overflow data */ | 
| 445 |  | class OverflowPage | 
| 446 |  | { | 
| 447 |  | public: | 
| 448 | 0 |     OverflowPage(const PageHeader& header) : m_header(header) {} | 
| 449 |  |     OverflowPage() = delete; | 
| 450 |  |  | 
| 451 |  |     PageHeader m_header; | 
| 452 |  |  | 
| 453 |  |     // BDB overloads some page fields to store overflow page data | 
| 454 |  |     // hf_offset contains the length of the overflow data stored on this page | 
| 455 |  |     // entries contains a reference count for references to this item | 
| 456 |  |  | 
| 457 |  |     // The overflow data itself. Begins immediately following header | 
| 458 |  |     std::vector<std::byte> data; | 
| 459 |  |  | 
| 460 |  |     template <typename Stream> | 
| 461 |  |     void Unserialize(Stream& s) | 
| 462 | 0 |     { | 
| 463 | 0 |         data.resize(m_header.hf_offset); | 
| 464 | 0 |         s.read(std::as_writable_bytes(std::span(data.data(), data.size()))); | 
| 465 | 0 |     } | 
| 466 |  | }; | 
| 467 |  |  | 
| 468 |  | /** A page of records in the database */ | 
| 469 |  | class InternalPage | 
| 470 |  | { | 
| 471 |  | public: | 
| 472 | 0 |     InternalPage(const PageHeader& header) : m_header(header) {} | 
| 473 |  |     InternalPage() = delete; | 
| 474 |  |  | 
| 475 |  |     PageHeader m_header; | 
| 476 |  |  | 
| 477 |  |     std::vector<uint16_t> indexes; | 
| 478 |  |     std::vector<InternalRecord> records; | 
| 479 |  |  | 
| 480 |  |     template <typename Stream> | 
| 481 |  |     void Unserialize(Stream& s) | 
| 482 | 0 |     { | 
| 483 |  |         // Current position within the page | 
| 484 | 0 |         int64_t pos = PageHeader::SIZE; | 
| 485 |  |  | 
| 486 |  |         // Get the items | 
| 487 | 0 |         for (uint32_t i = 0; i < m_header.entries; ++i) { | 
| 488 |  |             // Get the index | 
| 489 | 0 |             uint16_t index; | 
| 490 | 0 |             s >> index; | 
| 491 | 0 |             if (m_header.other_endian) { | 
| 492 | 0 |                 index = internal_bswap_16(index); | 
| 493 | 0 |             } | 
| 494 | 0 |             indexes.push_back(index); | 
| 495 | 0 |             pos += sizeof(uint16_t); | 
| 496 |  |  | 
| 497 |  |             // Go to the offset from the index | 
| 498 | 0 |             int64_t to_jump = index - pos; | 
| 499 | 0 |             if (to_jump < 0) { | 
| 500 | 0 |                 throw std::runtime_error("Internal record position not in page"); | 
| 501 | 0 |             } | 
| 502 | 0 |             s.ignore(to_jump); | 
| 503 |  |  | 
| 504 |  |             // Read the record | 
| 505 | 0 |             RecordHeader rec_hdr(m_header.other_endian); | 
| 506 | 0 |             s >> rec_hdr; | 
| 507 | 0 |             to_jump += RecordHeader::SIZE; | 
| 508 |  | 
 | 
| 509 | 0 |             if (rec_hdr.type != RecordType::KEYDATA) { | 
| 510 | 0 |                 throw std::runtime_error("Unknown record type in internal page"); | 
| 511 | 0 |             } | 
| 512 | 0 |             InternalRecord record(rec_hdr); | 
| 513 | 0 |             s >> record; | 
| 514 | 0 |             records.emplace_back(record); | 
| 515 | 0 |             to_jump += InternalRecord::FIXED_SIZE + rec_hdr.len; | 
| 516 |  |  | 
| 517 |  |             // Go back to the indexes | 
| 518 | 0 |             s.seek(-to_jump, SEEK_CUR); | 
| 519 | 0 |         } | 
| 520 | 0 |     } | 
| 521 |  | }; | 
| 522 |  |  | 
| 523 |  | static void SeekToPage(AutoFile& s, uint32_t page_num, uint32_t page_size) | 
| 524 | 0 | { | 
| 525 | 0 |     int64_t pos = int64_t{page_num} * page_size; | 
| 526 | 0 |     s.seek(pos, SEEK_SET); | 
| 527 | 0 | } | 
| 528 |  |  | 
| 529 |  | void BerkeleyRODatabase::Open() | 
| 530 | 0 | { | 
| 531 |  |     // Open the file | 
| 532 | 0 |     FILE* file = fsbridge::fopen(m_filepath, "rb"); | 
| 533 | 0 |     AutoFile db_file(file); | 
| 534 | 0 |     if (db_file.IsNull()) { | 
| 535 | 0 |         throw std::runtime_error("BerkeleyRODatabase: Failed to open database file"); | 
| 536 | 0 |     } | 
| 537 |  |  | 
| 538 | 0 |     uint32_t page_size = 4096; // Default page size | 
| 539 |  |  | 
| 540 |  |     // Read the outer metapage | 
| 541 |  |     // Expected page number is 0 | 
| 542 | 0 |     MetaPage outer_meta(0); | 
| 543 | 0 |     db_file >> outer_meta; | 
| 544 | 0 |     page_size = outer_meta.pagesize; | 
| 545 |  |  | 
| 546 |  |     // Verify the size of the file is a multiple of the page size | 
| 547 | 0 |     db_file.seek(0, SEEK_END); | 
| 548 | 0 |     int64_t size = db_file.tell(); | 
| 549 |  |  | 
| 550 |  |     // Since BDB stores everything in a page, the file size should be a multiple of the page size; | 
| 551 |  |     // However, BDB doesn't actually check that this is the case, and enforcing this check results | 
| 552 |  |     // in us rejecting a database that BDB would not, so this check needs to be excluded. | 
| 553 |  |     // This is left commented out as a reminder to not accidentally implement this in the future. | 
| 554 |  |     // if (size % page_size != 0) { | 
| 555 |  |     //     throw std::runtime_error("File size is not a multiple of page size"); | 
| 556 |  |     // } | 
| 557 |  |  | 
| 558 |  |     // Check the last page number | 
| 559 | 0 |     uint32_t expected_last_page{uint32_t((size / page_size) - 1)}; | 
| 560 | 0 |     if (outer_meta.last_page != expected_last_page) { | 
| 561 | 0 |         throw std::runtime_error("Last page number could not fit in file"); | 
| 562 | 0 |     } | 
| 563 |  |  | 
| 564 |  |     // Make sure encryption is disabled | 
| 565 | 0 |     if (outer_meta.encrypt_algo != 0) { | 
| 566 | 0 |         throw std::runtime_error("BDB builtin encryption is not supported"); | 
| 567 | 0 |     } | 
| 568 |  |  | 
| 569 |  |     // Check all Log Sequence Numbers (LSN) point to file 0 and offset 1 which indicates that the LSNs were | 
| 570 |  |     // reset and that the log files are not necessary to get all of the data in the database. | 
| 571 | 0 |     for (uint32_t i = 0; i < outer_meta.last_page; ++i) { | 
| 572 |  |         // The LSN is composed of 2 32-bit ints, the first is a file id, the second an offset | 
| 573 |  |         // It will always be the first 8 bytes of a page, so we deserialize it directly for every page | 
| 574 | 0 |         uint32_t file; | 
| 575 | 0 |         uint32_t offset; | 
| 576 | 0 |         SeekToPage(db_file, i, page_size); | 
| 577 | 0 |         db_file >> file >> offset; | 
| 578 | 0 |         if (outer_meta.other_endian) { | 
| 579 | 0 |             file = internal_bswap_32(file); | 
| 580 | 0 |             offset = internal_bswap_32(offset); | 
| 581 | 0 |         } | 
| 582 | 0 |         if (file != 0 || offset != 1) { | 
| 583 | 0 |             throw std::runtime_error("LSNs are not reset, this database is not completely flushed. Please reopen then close the database with a version that has BDB support"); | 
| 584 | 0 |         } | 
| 585 | 0 |     } | 
| 586 |  |  | 
| 587 |  |     // Read the root page | 
| 588 | 0 |     SeekToPage(db_file, outer_meta.root, page_size); | 
| 589 | 0 |     PageHeader header(outer_meta.root, outer_meta.other_endian); | 
| 590 | 0 |     db_file >> header; | 
| 591 | 0 |     if (header.type != PageType::BTREE_LEAF) { | 
| 592 | 0 |         throw std::runtime_error("Unexpected outer database root page type"); | 
| 593 | 0 |     } | 
| 594 | 0 |     if (header.entries != 2) { | 
| 595 | 0 |         throw std::runtime_error("Unexpected number of entries in outer database root page"); | 
| 596 | 0 |     } | 
| 597 | 0 |     RecordsPage page(header); | 
| 598 | 0 |     db_file >> page; | 
| 599 |  |  | 
| 600 |  |     // First record should be the string "main" | 
| 601 | 0 |     if (!std::holds_alternative<DataRecord>(page.records.at(0)) || std::get<DataRecord>(page.records.at(0)).data != SUBDATABASE_NAME) { | 
| 602 | 0 |         throw std::runtime_error("Subdatabase has an unexpected name"); | 
| 603 | 0 |     } | 
| 604 |  |     // Check length of page number for subdatabase location | 
| 605 | 0 |     if (!std::holds_alternative<DataRecord>(page.records.at(1)) || std::get<DataRecord>(page.records.at(1)).m_header.len != 4) { | 
| 606 | 0 |         throw std::runtime_error("Subdatabase page number has unexpected length"); | 
| 607 | 0 |     } | 
| 608 |  |  | 
| 609 |  |     // Read subdatabase page number | 
| 610 |  |     // It is written as a big endian 32 bit number | 
| 611 | 0 |     uint32_t main_db_page = ReadBE32(std::get<DataRecord>(page.records.at(1)).data.data()); | 
| 612 |  |  | 
| 613 |  |     // The main database is in a page that doesn't exist | 
| 614 | 0 |     if (main_db_page > outer_meta.last_page) { | 
| 615 | 0 |         throw std::runtime_error("Page number is greater than database last page"); | 
| 616 | 0 |     } | 
| 617 |  |  | 
| 618 |  |     // Read the inner metapage | 
| 619 | 0 |     SeekToPage(db_file, main_db_page, page_size); | 
| 620 | 0 |     MetaPage inner_meta(main_db_page); | 
| 621 | 0 |     db_file >> inner_meta; | 
| 622 |  | 
 | 
| 623 | 0 |     if (inner_meta.pagesize != page_size) { | 
| 624 | 0 |         throw std::runtime_error("Unexpected page size"); | 
| 625 | 0 |     } | 
| 626 |  |  | 
| 627 | 0 |     if (inner_meta.last_page > outer_meta.last_page) { | 
| 628 | 0 |         throw std::runtime_error("Subdatabase last page is greater than database last page"); | 
| 629 | 0 |     } | 
| 630 |  |  | 
| 631 |  |     // Make sure encryption is disabled | 
| 632 | 0 |     if (inner_meta.encrypt_algo != 0) { | 
| 633 | 0 |         throw std::runtime_error("BDB builtin encryption is not supported"); | 
| 634 | 0 |     } | 
| 635 |  |  | 
| 636 |  |     // Do a DFS through the BTree, starting at root | 
| 637 | 0 |     std::vector<uint32_t> pages{inner_meta.root}; | 
| 638 | 0 |     while (pages.size() > 0) { | 
| 639 | 0 |         uint32_t curr_page = pages.back(); | 
| 640 |  |         // It turns out BDB completely ignores this last_page field and doesn't actually update it to the correct | 
| 641 |  |         // last page. While we should be checking this, we can't. | 
| 642 |  |         // This is left commented out as a reminder to not accidentally implement this in the future. | 
| 643 |  |         // if (curr_page > inner_meta.last_page) { | 
| 644 |  |         //     throw std::runtime_error("Page number is greater than subdatabase last page"); | 
| 645 |  |         // } | 
| 646 | 0 |         pages.pop_back(); | 
| 647 | 0 |         SeekToPage(db_file, curr_page, page_size); | 
| 648 | 0 |         PageHeader header(curr_page, inner_meta.other_endian); | 
| 649 | 0 |         db_file >> header; | 
| 650 | 0 |         switch (header.type) { | 
| 651 | 0 |         case PageType::BTREE_INTERNAL: { | 
| 652 | 0 |             InternalPage int_page(header); | 
| 653 | 0 |             db_file >> int_page; | 
| 654 | 0 |             for (const InternalRecord& rec : int_page.records) { | 
| 655 | 0 |                 if (rec.m_header.deleted) continue; | 
| 656 | 0 |                 pages.push_back(rec.page_num); | 
| 657 | 0 |             } | 
| 658 | 0 |             break; | 
| 659 | 0 |         } | 
| 660 | 0 |         case PageType::BTREE_LEAF: { | 
| 661 | 0 |             RecordsPage rec_page(header); | 
| 662 | 0 |             db_file >> rec_page; | 
| 663 | 0 |             if (rec_page.records.size() % 2 != 0) { | 
| 664 |  |                 // BDB stores key value pairs in consecutive records, thus an odd number of records is unexpected | 
| 665 | 0 |                 throw std::runtime_error("Records page has odd number of records"); | 
| 666 | 0 |             } | 
| 667 | 0 |             bool is_key = true; | 
| 668 | 0 |             std::vector<std::byte> key; | 
| 669 | 0 |             for (const std::variant<DataRecord, OverflowRecord>& rec : rec_page.records) { | 
| 670 | 0 |                 std::vector<std::byte> data; | 
| 671 | 0 |                 if (const DataRecord* drec = std::get_if<DataRecord>(&rec)) { | 
| 672 | 0 |                     if (drec->m_header.deleted) continue; | 
| 673 | 0 |                     data = drec->data; | 
| 674 | 0 |                 } else if (const OverflowRecord* orec = std::get_if<OverflowRecord>(&rec)) { | 
| 675 | 0 |                     if (orec->m_header.deleted) continue; | 
| 676 | 0 |                     uint32_t next_page = orec->page_number; | 
| 677 | 0 |                     while (next_page != 0) { | 
| 678 | 0 |                         SeekToPage(db_file, next_page, page_size); | 
| 679 | 0 |                         PageHeader opage_header(next_page, inner_meta.other_endian); | 
| 680 | 0 |                         db_file >> opage_header; | 
| 681 | 0 |                         if (opage_header.type != PageType::OVERFLOW_DATA) { | 
| 682 | 0 |                             throw std::runtime_error("Bad overflow record page type"); | 
| 683 | 0 |                         } | 
| 684 | 0 |                         OverflowPage opage(opage_header); | 
| 685 | 0 |                         db_file >> opage; | 
| 686 | 0 |                         data.insert(data.end(), opage.data.begin(), opage.data.end()); | 
| 687 | 0 |                         next_page = opage_header.next_page; | 
| 688 | 0 |                     } | 
| 689 | 0 |                 } | 
| 690 |  |  | 
| 691 | 0 |                 if (is_key) { | 
| 692 | 0 |                     key = data; | 
| 693 | 0 |                 } else { | 
| 694 | 0 |                     m_records.emplace(SerializeData{key.begin(), key.end()}, SerializeData{data.begin(), data.end()}); | 
| 695 | 0 |                     key.clear(); | 
| 696 | 0 |                 } | 
| 697 | 0 |                 is_key = !is_key; | 
| 698 | 0 |             } | 
| 699 | 0 |             break; | 
| 700 | 0 |         } | 
| 701 | 0 |         default: | 
| 702 | 0 |             throw std::runtime_error("Unexpected page type"); | 
| 703 | 0 |         } | 
| 704 | 0 |     } | 
| 705 | 0 | } | 
| 706 |  |  | 
| 707 |  | std::unique_ptr<DatabaseBatch> BerkeleyRODatabase::MakeBatch() | 
| 708 | 0 | { | 
| 709 | 0 |     return std::make_unique<BerkeleyROBatch>(*this); | 
| 710 | 0 | } | 
| 711 |  |  | 
| 712 |  | bool BerkeleyRODatabase::Backup(const std::string& dest) const | 
| 713 | 0 | { | 
| 714 | 0 |     fs::path src(m_filepath); | 
| 715 | 0 |     fs::path dst(fs::PathFromString(dest)); | 
| 716 |  | 
 | 
| 717 | 0 |     if (fs::is_directory(dst)) { | 
| 718 | 0 |         dst = BDBDataFile(dst); | 
| 719 | 0 |     } | 
| 720 | 0 |     try { | 
| 721 | 0 |         if (fs::exists(dst) && fs::equivalent(src, dst)) { | 
| 722 | 0 |             LogPrintf("cannot backup to wallet source file %s\n", fs::PathToString(dst));| Line | Count | Source |  | 361 | 0 | #define LogPrintf(...) LogInfo(__VA_ARGS__) | Line | Count | Source |  | 356 | 0 | #define LogInfo(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Info, /*should_ratelimit=*/true, __VA_ARGS__) | Line | Count | Source |  | 350 | 0 | #define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(std::source_location::current(), category, level, should_ratelimit, __VA_ARGS__) | 
 | 
 | 
 | 
| 723 | 0 |             return false; | 
| 724 | 0 |         } | 
| 725 |  |  | 
| 726 | 0 |         fs::copy_file(src, dst, fs::copy_options::overwrite_existing); | 
| 727 | 0 |         LogPrintf("copied %s to %s\n", fs::PathToString(m_filepath), fs::PathToString(dst));| Line | Count | Source |  | 361 | 0 | #define LogPrintf(...) LogInfo(__VA_ARGS__) | Line | Count | Source |  | 356 | 0 | #define LogInfo(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Info, /*should_ratelimit=*/true, __VA_ARGS__) | Line | Count | Source |  | 350 | 0 | #define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(std::source_location::current(), category, level, should_ratelimit, __VA_ARGS__) | 
 | 
 | 
 | 
| 728 | 0 |         return true; | 
| 729 | 0 |     } catch (const fs::filesystem_error& e) { | 
| 730 | 0 |         LogWarning("error copying %s to %s - %s\n", fs::PathToString(m_filepath), fs::PathToString(dst), e.code().message());| Line | Count | Source |  | 357 | 0 | #define LogWarning(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Warning, /*should_ratelimit=*/true, __VA_ARGS__) | Line | Count | Source |  | 350 | 0 | #define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(std::source_location::current(), category, level, should_ratelimit, __VA_ARGS__) | 
 | 
 | 
| 731 | 0 |         return false; | 
| 732 | 0 |     } | 
| 733 | 0 | } | 
| 734 |  |  | 
| 735 |  | bool BerkeleyROBatch::ReadKey(DataStream&& key, DataStream& value) | 
| 736 | 0 | { | 
| 737 | 0 |     SerializeData key_data{key.begin(), key.end()}; | 
| 738 | 0 |     const auto it{m_database.m_records.find(key_data)}; | 
| 739 | 0 |     if (it == m_database.m_records.end()) { | 
| 740 | 0 |         return false; | 
| 741 | 0 |     } | 
| 742 | 0 |     auto val = it->second; | 
| 743 | 0 |     value.clear(); | 
| 744 | 0 |     value.write(std::span(val)); | 
| 745 | 0 |     return true; | 
| 746 | 0 | } | 
| 747 |  |  | 
| 748 |  | bool BerkeleyROBatch::HasKey(DataStream&& key) | 
| 749 | 0 | { | 
| 750 | 0 |     SerializeData key_data{key.begin(), key.end()}; | 
| 751 | 0 |     return m_database.m_records.count(key_data) > 0; | 
| 752 | 0 | } | 
| 753 |  |  | 
| 754 |  | BerkeleyROCursor::BerkeleyROCursor(const BerkeleyRODatabase& database, std::span<const std::byte> prefix) | 
| 755 | 0 |     : m_database(database) | 
| 756 | 0 | { | 
| 757 | 0 |     std::tie(m_cursor, m_cursor_end) = m_database.m_records.equal_range(BytePrefix{prefix}); | 
| 758 | 0 | } | 
| 759 |  |  | 
| 760 |  | DatabaseCursor::Status BerkeleyROCursor::Next(DataStream& ssKey, DataStream& ssValue) | 
| 761 | 0 | { | 
| 762 | 0 |     if (m_cursor == m_cursor_end) { | 
| 763 | 0 |         return DatabaseCursor::Status::DONE; | 
| 764 | 0 |     } | 
| 765 | 0 |     ssKey.write(std::span(m_cursor->first)); | 
| 766 | 0 |     ssValue.write(std::span(m_cursor->second)); | 
| 767 | 0 |     m_cursor++; | 
| 768 | 0 |     return DatabaseCursor::Status::MORE; | 
| 769 | 0 | } | 
| 770 |  |  | 
| 771 |  | std::unique_ptr<DatabaseCursor> BerkeleyROBatch::GetNewPrefixCursor(std::span<const std::byte> prefix) | 
| 772 | 0 | { | 
| 773 | 0 |     return std::make_unique<BerkeleyROCursor>(m_database, prefix); | 
| 774 | 0 | } | 
| 775 |  |  | 
| 776 |  | std::unique_ptr<BerkeleyRODatabase> MakeBerkeleyRODatabase(const fs::path& path, const DatabaseOptions& options, DatabaseStatus& status, bilingual_str& error) | 
| 777 | 0 | { | 
| 778 | 0 |     fs::path data_file = BDBDataFile(path); | 
| 779 | 0 |     try { | 
| 780 | 0 |         std::unique_ptr<BerkeleyRODatabase> db = std::make_unique<BerkeleyRODatabase>(data_file); | 
| 781 | 0 |         status = DatabaseStatus::SUCCESS; | 
| 782 | 0 |         return db; | 
| 783 | 0 |     } catch (const std::runtime_error& e) { | 
| 784 | 0 |         error.original = e.what(); | 
| 785 | 0 |         status = DatabaseStatus::FAILED_LOAD; | 
| 786 | 0 |         return nullptr; | 
| 787 | 0 |     } | 
| 788 | 0 | } | 
| 789 |  | } // namespace wallet |