/Users/eugenesiegel/btc/bitcoin/src/util/strencodings.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2009-2010 Satoshi Nakamoto |
2 | | // Copyright (c) 2009-present The Bitcoin Core developers |
3 | | // Distributed under the MIT software license, see the accompanying |
4 | | // file COPYING or http://www.opensource.org/licenses/mit-license.php. |
5 | | |
6 | | #include <util/strencodings.h> |
7 | | |
8 | | #include <crypto/hex_base.h> |
9 | | #include <span.h> |
10 | | |
11 | | #include <array> |
12 | | #include <cassert> |
13 | | #include <cstring> |
14 | | #include <limits> |
15 | | #include <optional> |
16 | | #include <ostream> |
17 | | #include <string> |
18 | | #include <vector> |
19 | | |
20 | | static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; |
21 | | |
22 | | static const std::string SAFE_CHARS[] = |
23 | | { |
24 | | CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT |
25 | | CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT |
26 | | CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME |
27 | | CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI |
28 | | }; |
29 | | |
30 | | std::string SanitizeString(std::string_view str, int rule) |
31 | 109k | { |
32 | 109k | std::string result; |
33 | 109k | for (char c : str) { |
34 | 0 | if (SAFE_CHARS[rule].find(c) != std::string::npos) { |
35 | 0 | result.push_back(c); |
36 | 0 | } |
37 | 0 | } |
38 | 109k | return result; |
39 | 109k | } |
40 | | |
41 | | bool IsHex(std::string_view str) |
42 | 0 | { |
43 | 0 | for (char c : str) { |
44 | 0 | if (HexDigit(c) < 0) return false; |
45 | 0 | } |
46 | 0 | return (str.size() > 0) && (str.size()%2 == 0); |
47 | 0 | } |
48 | | |
49 | | template <typename Byte> |
50 | | std::optional<std::vector<Byte>> TryParseHex(std::string_view str) |
51 | 0 | { |
52 | 0 | std::vector<Byte> vch; |
53 | 0 | vch.reserve(str.size() / 2); // two hex characters form a single byte |
54 | |
|
55 | 0 | auto it = str.begin(); |
56 | 0 | while (it != str.end()) { |
57 | 0 | if (IsSpace(*it)) { |
58 | 0 | ++it; |
59 | 0 | continue; |
60 | 0 | } |
61 | 0 | auto c1 = HexDigit(*(it++)); |
62 | 0 | if (it == str.end()) return std::nullopt; |
63 | 0 | auto c2 = HexDigit(*(it++)); |
64 | 0 | if (c1 < 0 || c2 < 0) return std::nullopt; |
65 | 0 | vch.push_back(Byte(c1 << 4) | Byte(c2)); |
66 | 0 | } |
67 | 0 | return vch; |
68 | 0 | } Unexecuted instantiation: _Z11TryParseHexISt4byteENSt3__18optionalINS1_6vectorIT_NS1_9allocatorIS4_EEEEEENS1_17basic_string_viewIcNS1_11char_traitsIcEEEE Unexecuted instantiation: _Z11TryParseHexIhENSt3__18optionalINS0_6vectorIT_NS0_9allocatorIS3_EEEEEENS0_17basic_string_viewIcNS0_11char_traitsIcEEEE |
69 | | template std::optional<std::vector<std::byte>> TryParseHex(std::string_view); |
70 | | template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view); |
71 | | |
72 | | bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut) |
73 | 0 | { |
74 | 0 | bool valid = false; |
75 | 0 | size_t colon = in.find_last_of(':'); |
76 | | // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator |
77 | 0 | bool fHaveColon = colon != in.npos; |
78 | 0 | bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe |
79 | 0 | bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)}; |
80 | 0 | if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) { |
81 | 0 | if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) { |
82 | 0 | in = in.substr(0, colon); |
83 | 0 | portOut = *n; |
84 | 0 | valid = (portOut != 0); |
85 | 0 | } |
86 | 0 | } else { |
87 | 0 | valid = true; |
88 | 0 | } |
89 | 0 | if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') { |
90 | 0 | hostOut = in.substr(1, in.size() - 2); |
91 | 0 | } else { |
92 | 0 | hostOut = in; |
93 | 0 | } |
94 | |
|
95 | 0 | return valid; |
96 | 0 | } |
97 | | |
98 | | std::string EncodeBase64(std::span<const unsigned char> input) |
99 | 0 | { |
100 | 0 | static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
101 | |
|
102 | 0 | std::string str; |
103 | 0 | str.reserve(((input.size() + 2) / 3) * 4); |
104 | 0 | ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end()); |
105 | 0 | while (str.size() % 4) str += '='; |
106 | 0 | return str; |
107 | 0 | } |
108 | | |
109 | | std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str) |
110 | 0 | { |
111 | 0 | static const int8_t decode64_table[256]{ |
112 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
113 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
114 | 0 | -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, |
115 | 0 | -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
116 | 0 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, |
117 | 0 | 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
118 | 0 | 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
119 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
120 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
121 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
122 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
123 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
124 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 |
125 | 0 | }; |
126 | |
|
127 | 0 | if (str.size() % 4 != 0) return {}; |
128 | | /* One or two = characters at the end are permitted. */ |
129 | 0 | if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); |
130 | 0 | if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); |
131 | |
|
132 | 0 | std::vector<unsigned char> ret; |
133 | 0 | ret.reserve((str.size() * 3) / 4); |
134 | 0 | bool valid = ConvertBits<6, 8, false>( |
135 | 0 | [&](unsigned char c) { ret.push_back(c); }, |
136 | 0 | str.begin(), str.end(), |
137 | 0 | [](char c) { return decode64_table[uint8_t(c)]; } |
138 | 0 | ); |
139 | 0 | if (!valid) return {}; |
140 | | |
141 | 0 | return ret; |
142 | 0 | } |
143 | | |
144 | | std::string EncodeBase32(std::span<const unsigned char> input, bool pad) |
145 | 1.29k | { |
146 | 1.29k | static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567"; |
147 | | |
148 | 1.29k | std::string str; |
149 | 1.29k | str.reserve(((input.size() + 4) / 5) * 8); |
150 | 62.5k | ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end()); |
151 | 1.29k | if (pad) { |
152 | 432 | while (str.size() % 8) { |
153 | 0 | str += '='; |
154 | 0 | } |
155 | 432 | } |
156 | 1.29k | return str; |
157 | 1.29k | } |
158 | | |
159 | | std::string EncodeBase32(std::string_view str, bool pad) |
160 | 0 | { |
161 | 0 | return EncodeBase32(MakeUCharSpan(str), pad); |
162 | 0 | } |
163 | | |
164 | | std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str) |
165 | 0 | { |
166 | 0 | static const int8_t decode32_table[256]{ |
167 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
168 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
169 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, |
170 | 0 | -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
171 | 0 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, |
172 | 0 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
173 | 0 | 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
174 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
175 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
176 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
177 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
178 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
179 | 0 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 |
180 | 0 | }; |
181 | |
|
182 | 0 | if (str.size() % 8 != 0) return {}; |
183 | | /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */ |
184 | 0 | if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); |
185 | 0 | if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); |
186 | 0 | if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); |
187 | 0 | if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); |
188 | |
|
189 | 0 | std::vector<unsigned char> ret; |
190 | 0 | ret.reserve((str.size() * 5) / 8); |
191 | 0 | bool valid = ConvertBits<5, 8, false>( |
192 | 0 | [&](unsigned char c) { ret.push_back(c); }, |
193 | 0 | str.begin(), str.end(), |
194 | 0 | [](char c) { return decode32_table[uint8_t(c)]; } |
195 | 0 | ); |
196 | |
|
197 | 0 | if (!valid) return {}; |
198 | | |
199 | 0 | return ret; |
200 | 0 | } |
201 | | |
202 | | std::string FormatParagraph(std::string_view in, size_t width, size_t indent) |
203 | 0 | { |
204 | 0 | assert(width >= indent); |
205 | 0 | std::stringstream out; |
206 | 0 | size_t ptr = 0; |
207 | 0 | size_t indented = 0; |
208 | 0 | while (ptr < in.size()) |
209 | 0 | { |
210 | 0 | size_t lineend = in.find_first_of('\n', ptr); |
211 | 0 | if (lineend == std::string::npos) { |
212 | 0 | lineend = in.size(); |
213 | 0 | } |
214 | 0 | const size_t linelen = lineend - ptr; |
215 | 0 | const size_t rem_width = width - indented; |
216 | 0 | if (linelen <= rem_width) { |
217 | 0 | out << in.substr(ptr, linelen + 1); |
218 | 0 | ptr = lineend + 1; |
219 | 0 | indented = 0; |
220 | 0 | } else { |
221 | 0 | size_t finalspace = in.find_last_of(" \n", ptr + rem_width); |
222 | 0 | if (finalspace == std::string::npos || finalspace < ptr) { |
223 | | // No place to break; just include the entire word and move on |
224 | 0 | finalspace = in.find_first_of("\n ", ptr); |
225 | 0 | if (finalspace == std::string::npos) { |
226 | | // End of the string, just add it and break |
227 | 0 | out << in.substr(ptr); |
228 | 0 | break; |
229 | 0 | } |
230 | 0 | } |
231 | 0 | out << in.substr(ptr, finalspace - ptr) << "\n"; |
232 | 0 | if (in[finalspace] == '\n') { |
233 | 0 | indented = 0; |
234 | 0 | } else if (indent) { |
235 | 0 | out << std::string(indent, ' '); |
236 | 0 | indented = indent; |
237 | 0 | } |
238 | 0 | ptr = finalspace + 1; |
239 | 0 | } |
240 | 0 | } |
241 | 0 | return out.str(); |
242 | 0 | } |
243 | | |
244 | | /** Upper bound for mantissa. |
245 | | * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer. |
246 | | * Larger integers cannot consist of arbitrary combinations of 0-9: |
247 | | * |
248 | | * 999999999999999999 1^18-1 |
249 | | * 9223372036854775807 (1<<63)-1 (max int64_t) |
250 | | * 9999999999999999999 1^19-1 (would overflow) |
251 | | */ |
252 | | static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL; |
253 | | |
254 | | /** Helper function for ParseFixedPoint */ |
255 | | static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros) |
256 | 0 | { |
257 | 0 | if(ch == '0') |
258 | 0 | ++mantissa_tzeros; |
259 | 0 | else { |
260 | 0 | for (int i=0; i<=mantissa_tzeros; ++i) { |
261 | 0 | if (mantissa > (UPPER_BOUND / 10LL)) |
262 | 0 | return false; /* overflow */ |
263 | 0 | mantissa *= 10; |
264 | 0 | } |
265 | 0 | mantissa += ch - '0'; |
266 | 0 | mantissa_tzeros = 0; |
267 | 0 | } |
268 | 0 | return true; |
269 | 0 | } |
270 | | |
271 | | bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out) |
272 | 0 | { |
273 | 0 | int64_t mantissa = 0; |
274 | 0 | int64_t exponent = 0; |
275 | 0 | int mantissa_tzeros = 0; |
276 | 0 | bool mantissa_sign = false; |
277 | 0 | bool exponent_sign = false; |
278 | 0 | int ptr = 0; |
279 | 0 | int end = val.size(); |
280 | 0 | int point_ofs = 0; |
281 | |
|
282 | 0 | if (ptr < end && val[ptr] == '-') { |
283 | 0 | mantissa_sign = true; |
284 | 0 | ++ptr; |
285 | 0 | } |
286 | 0 | if (ptr < end) |
287 | 0 | { |
288 | 0 | if (val[ptr] == '0') { |
289 | | /* pass single 0 */ |
290 | 0 | ++ptr; |
291 | 0 | } else if (val[ptr] >= '1' && val[ptr] <= '9') { |
292 | 0 | while (ptr < end && IsDigit(val[ptr])) { |
293 | 0 | if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) |
294 | 0 | return false; /* overflow */ |
295 | 0 | ++ptr; |
296 | 0 | } |
297 | 0 | } else return false; /* missing expected digit */ |
298 | 0 | } else return false; /* empty string or loose '-' */ |
299 | 0 | if (ptr < end && val[ptr] == '.') |
300 | 0 | { |
301 | 0 | ++ptr; |
302 | 0 | if (ptr < end && IsDigit(val[ptr])) |
303 | 0 | { |
304 | 0 | while (ptr < end && IsDigit(val[ptr])) { |
305 | 0 | if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) |
306 | 0 | return false; /* overflow */ |
307 | 0 | ++ptr; |
308 | 0 | ++point_ofs; |
309 | 0 | } |
310 | 0 | } else return false; /* missing expected digit */ |
311 | 0 | } |
312 | 0 | if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E')) |
313 | 0 | { |
314 | 0 | ++ptr; |
315 | 0 | if (ptr < end && val[ptr] == '+') |
316 | 0 | ++ptr; |
317 | 0 | else if (ptr < end && val[ptr] == '-') { |
318 | 0 | exponent_sign = true; |
319 | 0 | ++ptr; |
320 | 0 | } |
321 | 0 | if (ptr < end && IsDigit(val[ptr])) { |
322 | 0 | while (ptr < end && IsDigit(val[ptr])) { |
323 | 0 | if (exponent > (UPPER_BOUND / 10LL)) |
324 | 0 | return false; /* overflow */ |
325 | 0 | exponent = exponent * 10 + val[ptr] - '0'; |
326 | 0 | ++ptr; |
327 | 0 | } |
328 | 0 | } else return false; /* missing expected digit */ |
329 | 0 | } |
330 | 0 | if (ptr != end) |
331 | 0 | return false; /* trailing garbage */ |
332 | | |
333 | | /* finalize exponent */ |
334 | 0 | if (exponent_sign) |
335 | 0 | exponent = -exponent; |
336 | 0 | exponent = exponent - point_ofs + mantissa_tzeros; |
337 | | |
338 | | /* finalize mantissa */ |
339 | 0 | if (mantissa_sign) |
340 | 0 | mantissa = -mantissa; |
341 | | |
342 | | /* convert to one 64-bit fixed-point value */ |
343 | 0 | exponent += decimals; |
344 | 0 | if (exponent < 0) |
345 | 0 | return false; /* cannot represent values smaller than 10^-decimals */ |
346 | 0 | if (exponent >= 18) |
347 | 0 | return false; /* cannot represent values larger than or equal to 10^(18-decimals) */ |
348 | | |
349 | 0 | for (int i=0; i < exponent; ++i) { |
350 | 0 | if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL)) |
351 | 0 | return false; /* overflow */ |
352 | 0 | mantissa *= 10; |
353 | 0 | } |
354 | 0 | if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND) |
355 | 0 | return false; /* overflow */ |
356 | | |
357 | 0 | if (amount_out) |
358 | 0 | *amount_out = mantissa; |
359 | |
|
360 | 0 | return true; |
361 | 0 | } |
362 | | |
363 | | std::string ToLower(std::string_view str) |
364 | 0 | { |
365 | 0 | std::string r; |
366 | 0 | r.reserve(str.size()); |
367 | 0 | for (auto ch : str) r += ToLower(ch); |
368 | 0 | return r; |
369 | 0 | } |
370 | | |
371 | | std::string ToUpper(std::string_view str) |
372 | 0 | { |
373 | 0 | std::string r; |
374 | 0 | r.reserve(str.size()); |
375 | 0 | for (auto ch : str) r += ToUpper(ch); |
376 | 0 | return r; |
377 | 0 | } |
378 | | |
379 | | std::string Capitalize(std::string str) |
380 | 0 | { |
381 | 0 | if (str.empty()) return str; |
382 | 0 | str[0] = ToUpper(str.front()); |
383 | 0 | return str; |
384 | 0 | } |
385 | | |
386 | | std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier) |
387 | 0 | { |
388 | 0 | if (str.empty()) { |
389 | 0 | return std::nullopt; |
390 | 0 | } |
391 | 0 | auto multiplier = default_multiplier; |
392 | 0 | char unit = str.back(); |
393 | 0 | switch (unit) { |
394 | 0 | case 'k': |
395 | 0 | multiplier = ByteUnit::k; |
396 | 0 | break; |
397 | 0 | case 'K': |
398 | 0 | multiplier = ByteUnit::K; |
399 | 0 | break; |
400 | 0 | case 'm': |
401 | 0 | multiplier = ByteUnit::m; |
402 | 0 | break; |
403 | 0 | case 'M': |
404 | 0 | multiplier = ByteUnit::M; |
405 | 0 | break; |
406 | 0 | case 'g': |
407 | 0 | multiplier = ByteUnit::g; |
408 | 0 | break; |
409 | 0 | case 'G': |
410 | 0 | multiplier = ByteUnit::G; |
411 | 0 | break; |
412 | 0 | case 't': |
413 | 0 | multiplier = ByteUnit::t; |
414 | 0 | break; |
415 | 0 | case 'T': |
416 | 0 | multiplier = ByteUnit::T; |
417 | 0 | break; |
418 | 0 | default: |
419 | 0 | unit = 0; |
420 | 0 | break; |
421 | 0 | } |
422 | | |
423 | 0 | uint64_t unit_amount = static_cast<uint64_t>(multiplier); |
424 | 0 | auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str); |
425 | 0 | if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow |
426 | 0 | return std::nullopt; |
427 | 0 | } |
428 | 0 | return *parsed_num * unit_amount; |
429 | 0 | } |