input_adapters.hpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. // __ _____ _____ _____
  2. // __| | __| | | | JSON for Modern C++
  3. // | | |__ | | | | | | version 3.12.0
  4. // |_____|_____|_____|_|___| https://github.com/nlohmann/json
  5. //
  6. // SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann <https://nlohmann.me>
  7. // SPDX-License-Identifier: MIT
  8. #pragma once
  9. #include <array> // array
  10. #include <cstddef> // size_t
  11. #include <cstring> // strlen
  12. #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
  13. #include <memory> // shared_ptr, make_shared, addressof
  14. #include <numeric> // accumulate
  15. #include <string> // string, char_traits
  16. #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
  17. #include <utility> // pair, declval
  18. #ifndef JSON_NO_IO
  19. #include <cstdio> // FILE *
  20. #include <istream> // istream
  21. #endif // JSON_NO_IO
  22. #include <nlohmann/detail/exceptions.hpp>
  23. #include <nlohmann/detail/iterators/iterator_traits.hpp>
  24. #include <nlohmann/detail/macro_scope.hpp>
  25. #include <nlohmann/detail/meta/type_traits.hpp>
  26. NLOHMANN_JSON_NAMESPACE_BEGIN
  27. namespace detail
  28. {
  29. /// the supported input formats
  30. enum class input_format_t { json, cbor, msgpack, ubjson, bson, bjdata };
  31. ////////////////////
  32. // input adapters //
  33. ////////////////////
  34. #ifndef JSON_NO_IO
  35. /*!
  36. Input adapter for stdio file access. This adapter read only 1 byte and do not use any
  37. buffer. This adapter is a very low level adapter.
  38. */
  39. class file_input_adapter
  40. {
  41. public:
  42. using char_type = char;
  43. JSON_HEDLEY_NON_NULL(2)
  44. explicit file_input_adapter(std::FILE* f) noexcept
  45. : m_file(f)
  46. {
  47. JSON_ASSERT(m_file != nullptr);
  48. }
  49. // make class move-only
  50. file_input_adapter(const file_input_adapter&) = delete;
  51. file_input_adapter(file_input_adapter&&) noexcept = default;
  52. file_input_adapter& operator=(const file_input_adapter&) = delete;
  53. file_input_adapter& operator=(file_input_adapter&&) = delete;
  54. ~file_input_adapter() = default;
  55. std::char_traits<char>::int_type get_character() noexcept
  56. {
  57. return std::fgetc(m_file);
  58. }
  59. // returns the number of characters successfully read
  60. template<class T>
  61. std::size_t get_elements(T* dest, std::size_t count = 1)
  62. {
  63. return fread(dest, 1, sizeof(T) * count, m_file);
  64. }
  65. private:
  66. /// the file pointer to read from
  67. std::FILE* m_file;
  68. };
  69. /*!
  70. Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
  71. beginning of input. Does not support changing the underlying std::streambuf
  72. in mid-input. Maintains underlying std::istream and std::streambuf to support
  73. subsequent use of standard std::istream operations to process any input
  74. characters following those used in parsing the JSON input. Clears the
  75. std::istream flags; any input errors (e.g., EOF) will be detected by the first
  76. subsequent call for input from the std::istream.
  77. */
  78. class input_stream_adapter
  79. {
  80. public:
  81. using char_type = char;
  82. ~input_stream_adapter()
  83. {
  84. // clear stream flags; we use underlying streambuf I/O, do not
  85. // maintain ifstream flags, except eof
  86. if (is != nullptr)
  87. {
  88. is->clear(is->rdstate() & std::ios::eofbit);
  89. }
  90. }
  91. explicit input_stream_adapter(std::istream& i)
  92. : is(&i), sb(i.rdbuf())
  93. {}
  94. // delete because of pointer members
  95. input_stream_adapter(const input_stream_adapter&) = delete;
  96. input_stream_adapter& operator=(input_stream_adapter&) = delete;
  97. input_stream_adapter& operator=(input_stream_adapter&&) = delete;
  98. input_stream_adapter(input_stream_adapter&& rhs) noexcept
  99. : is(rhs.is), sb(rhs.sb)
  100. {
  101. rhs.is = nullptr;
  102. rhs.sb = nullptr;
  103. }
  104. // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
  105. // ensure that std::char_traits<char>::eof() and the character 0xFF do not
  106. // end up as the same value, e.g. 0xFFFFFFFF.
  107. std::char_traits<char>::int_type get_character()
  108. {
  109. auto res = sb->sbumpc();
  110. // set eof manually, as we don't use the istream interface.
  111. if (JSON_HEDLEY_UNLIKELY(res == std::char_traits<char>::eof()))
  112. {
  113. is->clear(is->rdstate() | std::ios::eofbit);
  114. }
  115. return res;
  116. }
  117. template<class T>
  118. std::size_t get_elements(T* dest, std::size_t count = 1)
  119. {
  120. auto res = static_cast<std::size_t>(sb->sgetn(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(count * sizeof(T))));
  121. if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T)))
  122. {
  123. is->clear(is->rdstate() | std::ios::eofbit);
  124. }
  125. return res;
  126. }
  127. private:
  128. /// the associated input stream
  129. std::istream* is = nullptr;
  130. std::streambuf* sb = nullptr;
  131. };
  132. #endif // JSON_NO_IO
  133. // General-purpose iterator-based adapter. It might not be as fast as
  134. // theoretically possible for some containers, but it is extremely versatile.
  135. template<typename IteratorType>
  136. class iterator_input_adapter
  137. {
  138. public:
  139. using char_type = typename std::iterator_traits<IteratorType>::value_type;
  140. iterator_input_adapter(IteratorType first, IteratorType last)
  141. : current(std::move(first)), end(std::move(last))
  142. {}
  143. typename char_traits<char_type>::int_type get_character()
  144. {
  145. if (JSON_HEDLEY_LIKELY(current != end))
  146. {
  147. auto result = char_traits<char_type>::to_int_type(*current);
  148. std::advance(current, 1);
  149. return result;
  150. }
  151. return char_traits<char_type>::eof();
  152. }
  153. // for general iterators, we cannot really do something better than falling back to processing the range one-by-one
  154. template<class T>
  155. std::size_t get_elements(T* dest, std::size_t count = 1)
  156. {
  157. auto* ptr = reinterpret_cast<char*>(dest);
  158. for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index)
  159. {
  160. if (JSON_HEDLEY_LIKELY(current != end))
  161. {
  162. ptr[read_index] = static_cast<char>(*current);
  163. std::advance(current, 1);
  164. }
  165. else
  166. {
  167. return read_index;
  168. }
  169. }
  170. return count * sizeof(T);
  171. }
  172. private:
  173. IteratorType current;
  174. IteratorType end;
  175. template<typename BaseInputAdapter, size_t T>
  176. friend struct wide_string_input_helper;
  177. bool empty() const
  178. {
  179. return current == end;
  180. }
  181. };
  182. template<typename BaseInputAdapter, size_t T>
  183. struct wide_string_input_helper;
  184. template<typename BaseInputAdapter>
  185. struct wide_string_input_helper<BaseInputAdapter, 4>
  186. {
  187. // UTF-32
  188. static void fill_buffer(BaseInputAdapter& input,
  189. std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
  190. size_t& utf8_bytes_index,
  191. size_t& utf8_bytes_filled)
  192. {
  193. utf8_bytes_index = 0;
  194. if (JSON_HEDLEY_UNLIKELY(input.empty()))
  195. {
  196. utf8_bytes[0] = std::char_traits<char>::eof();
  197. utf8_bytes_filled = 1;
  198. }
  199. else
  200. {
  201. // get the current character
  202. const auto wc = input.get_character();
  203. // UTF-32 to UTF-8 encoding
  204. if (wc < 0x80)
  205. {
  206. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  207. utf8_bytes_filled = 1;
  208. }
  209. else if (wc <= 0x7FF)
  210. {
  211. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
  212. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  213. utf8_bytes_filled = 2;
  214. }
  215. else if (wc <= 0xFFFF)
  216. {
  217. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
  218. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
  219. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  220. utf8_bytes_filled = 3;
  221. }
  222. else if (wc <= 0x10FFFF)
  223. {
  224. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
  225. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
  226. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
  227. utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  228. utf8_bytes_filled = 4;
  229. }
  230. else
  231. {
  232. // unknown character
  233. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  234. utf8_bytes_filled = 1;
  235. }
  236. }
  237. }
  238. };
  239. template<typename BaseInputAdapter>
  240. struct wide_string_input_helper<BaseInputAdapter, 2>
  241. {
  242. // UTF-16
  243. static void fill_buffer(BaseInputAdapter& input,
  244. std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
  245. size_t& utf8_bytes_index,
  246. size_t& utf8_bytes_filled)
  247. {
  248. utf8_bytes_index = 0;
  249. if (JSON_HEDLEY_UNLIKELY(input.empty()))
  250. {
  251. utf8_bytes[0] = std::char_traits<char>::eof();
  252. utf8_bytes_filled = 1;
  253. }
  254. else
  255. {
  256. // get the current character
  257. const auto wc = input.get_character();
  258. // UTF-16 to UTF-8 encoding
  259. if (wc < 0x80)
  260. {
  261. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  262. utf8_bytes_filled = 1;
  263. }
  264. else if (wc <= 0x7FF)
  265. {
  266. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
  267. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  268. utf8_bytes_filled = 2;
  269. }
  270. else if (0xD800 > wc || wc >= 0xE000)
  271. {
  272. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
  273. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
  274. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
  275. utf8_bytes_filled = 3;
  276. }
  277. else
  278. {
  279. if (JSON_HEDLEY_UNLIKELY(!input.empty()))
  280. {
  281. const auto wc2 = static_cast<unsigned int>(input.get_character());
  282. const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
  283. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
  284. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
  285. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
  286. utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
  287. utf8_bytes_filled = 4;
  288. }
  289. else
  290. {
  291. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  292. utf8_bytes_filled = 1;
  293. }
  294. }
  295. }
  296. }
  297. };
  298. // Wraps another input adapter to convert wide character types into individual bytes.
  299. template<typename BaseInputAdapter, typename WideCharType>
  300. class wide_string_input_adapter
  301. {
  302. public:
  303. using char_type = char;
  304. wide_string_input_adapter(BaseInputAdapter base)
  305. : base_adapter(base) {}
  306. typename std::char_traits<char>::int_type get_character() noexcept
  307. {
  308. // check if buffer needs to be filled
  309. if (utf8_bytes_index == utf8_bytes_filled)
  310. {
  311. fill_buffer<sizeof(WideCharType)>();
  312. JSON_ASSERT(utf8_bytes_filled > 0);
  313. JSON_ASSERT(utf8_bytes_index == 0);
  314. }
  315. // use buffer
  316. JSON_ASSERT(utf8_bytes_filled > 0);
  317. JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
  318. return utf8_bytes[utf8_bytes_index++];
  319. }
  320. // parsing binary with wchar doesn't make sense, but since the parsing mode can be runtime, we need something here
  321. template<class T>
  322. std::size_t get_elements(T* /*dest*/, std::size_t /*count*/ = 1)
  323. {
  324. JSON_THROW(parse_error::create(112, 1, "wide string type cannot be interpreted as binary data", nullptr));
  325. }
  326. private:
  327. BaseInputAdapter base_adapter;
  328. template<size_t T>
  329. void fill_buffer()
  330. {
  331. wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
  332. }
  333. /// a buffer for UTF-8 bytes
  334. std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
  335. /// index to the utf8_codes array for the next valid byte
  336. std::size_t utf8_bytes_index = 0;
  337. /// number of valid bytes in the utf8_codes array
  338. std::size_t utf8_bytes_filled = 0;
  339. };
  340. template<typename IteratorType, typename Enable = void>
  341. struct iterator_input_adapter_factory
  342. {
  343. using iterator_type = IteratorType;
  344. using char_type = typename std::iterator_traits<iterator_type>::value_type;
  345. using adapter_type = iterator_input_adapter<iterator_type>;
  346. static adapter_type create(IteratorType first, IteratorType last)
  347. {
  348. return adapter_type(std::move(first), std::move(last));
  349. }
  350. };
  351. template<typename T>
  352. struct is_iterator_of_multibyte
  353. {
  354. using value_type = typename std::iterator_traits<T>::value_type;
  355. enum
  356. {
  357. value = sizeof(value_type) > 1
  358. };
  359. };
  360. template<typename IteratorType>
  361. struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
  362. {
  363. using iterator_type = IteratorType;
  364. using char_type = typename std::iterator_traits<iterator_type>::value_type;
  365. using base_adapter_type = iterator_input_adapter<iterator_type>;
  366. using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>;
  367. static adapter_type create(IteratorType first, IteratorType last)
  368. {
  369. return adapter_type(base_adapter_type(std::move(first), std::move(last)));
  370. }
  371. };
  372. // General purpose iterator-based input
  373. template<typename IteratorType>
  374. typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
  375. {
  376. using factory_type = iterator_input_adapter_factory<IteratorType>;
  377. return factory_type::create(first, last);
  378. }
  379. // Convenience shorthand from container to iterator
  380. // Enables ADL on begin(container) and end(container)
  381. // Encloses the using declarations in namespace for not to leak them to outside scope
  382. namespace container_input_adapter_factory_impl
  383. {
  384. using std::begin;
  385. using std::end;
  386. template<typename ContainerType, typename Enable = void>
  387. struct container_input_adapter_factory {};
  388. template<typename ContainerType>
  389. struct container_input_adapter_factory< ContainerType,
  390. void_t<decltype(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>()))>>
  391. {
  392. using adapter_type = decltype(input_adapter(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>())));
  393. static adapter_type create(const ContainerType& container)
  394. {
  395. return input_adapter(begin(container), end(container));
  396. }
  397. };
  398. } // namespace container_input_adapter_factory_impl
  399. template<typename ContainerType>
  400. typename container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::adapter_type input_adapter(const ContainerType& container)
  401. {
  402. return container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::create(container);
  403. }
  404. // specialization for std::string
  405. using string_input_adapter_type = decltype(input_adapter(std::declval<std::string>()));
  406. #ifndef JSON_NO_IO
  407. // Special cases with fast paths
  408. inline file_input_adapter input_adapter(std::FILE* file)
  409. {
  410. if (file == nullptr)
  411. {
  412. JSON_THROW(parse_error::create(101, 0, "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr));
  413. }
  414. return file_input_adapter(file);
  415. }
  416. inline input_stream_adapter input_adapter(std::istream& stream)
  417. {
  418. return input_stream_adapter(stream);
  419. }
  420. inline input_stream_adapter input_adapter(std::istream&& stream)
  421. {
  422. return input_stream_adapter(stream);
  423. }
  424. #endif // JSON_NO_IO
  425. using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
  426. // Null-delimited strings, and the like.
  427. template < typename CharT,
  428. typename std::enable_if <
  429. std::is_pointer<CharT>::value&&
  430. !std::is_array<CharT>::value&&
  431. std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
  432. sizeof(typename std::remove_pointer<CharT>::type) == 1,
  433. int >::type = 0 >
  434. contiguous_bytes_input_adapter input_adapter(CharT b)
  435. {
  436. if (b == nullptr)
  437. {
  438. JSON_THROW(parse_error::create(101, 0, "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr));
  439. }
  440. auto length = std::strlen(reinterpret_cast<const char*>(b));
  441. const auto* ptr = reinterpret_cast<const char*>(b);
  442. return input_adapter(ptr, ptr + length); // cppcheck-suppress[nullPointerArithmeticRedundantCheck]
  443. }
  444. template<typename T, std::size_t N>
  445. auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N)) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
  446. {
  447. return input_adapter(array, array + N);
  448. }
  449. // This class only handles inputs of input_buffer_adapter type.
  450. // It's required so that expressions like {ptr, len} can be implicitly cast
  451. // to the correct adapter.
  452. class span_input_adapter
  453. {
  454. public:
  455. template < typename CharT,
  456. typename std::enable_if <
  457. std::is_pointer<CharT>::value&&
  458. std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
  459. sizeof(typename std::remove_pointer<CharT>::type) == 1,
  460. int >::type = 0 >
  461. span_input_adapter(CharT b, std::size_t l)
  462. : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
  463. template<class IteratorType,
  464. typename std::enable_if<
  465. std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
  466. int>::type = 0>
  467. span_input_adapter(IteratorType first, IteratorType last)
  468. : ia(input_adapter(first, last)) {}
  469. contiguous_bytes_input_adapter&& get()
  470. {
  471. return std::move(ia); // NOLINT(hicpp-move-const-arg,performance-move-const-arg)
  472. }
  473. private:
  474. contiguous_bytes_input_adapter ia;
  475. };
  476. } // namespace detail
  477. NLOHMANN_JSON_NAMESPACE_END