mirror of
https://github.com/nlohmann/json.git
synced 2025-07-28 12:02:00 +03:00
@ -617,6 +617,36 @@ struct is_compatible_type
|
||||
#include <stdexcept> // runtime_error
|
||||
#include <string> // to_string
|
||||
|
||||
// #include <nlohmann/detail/input/position_t.hpp>
|
||||
|
||||
|
||||
#include <cstddef> // size_t
|
||||
|
||||
namespace nlohmann
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
/// struct to capture the start position of the current token
|
||||
struct position_t
|
||||
{
|
||||
/// the total number of characters read
|
||||
std::size_t chars_read_total = 0;
|
||||
/// the number of characters read in the current line
|
||||
std::size_t chars_read_current_line = 0;
|
||||
/// the number of lines read
|
||||
std::size_t lines_read = 0;
|
||||
|
||||
/// conversion to size_t to preserve SAX interface
|
||||
constexpr operator size_t() const
|
||||
{
|
||||
return chars_read_total;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace nlohmann
|
||||
{
|
||||
namespace detail
|
||||
@ -727,15 +757,23 @@ class parse_error : public exception
|
||||
/*!
|
||||
@brief create a parse error exception
|
||||
@param[in] id_ the id of the exception
|
||||
@param[in] byte_ the byte index where the error occurred (or 0 if the
|
||||
position cannot be determined)
|
||||
@param[in] position the position where the error occurred (or with
|
||||
chars_read_total=0 if the position cannot be
|
||||
determined)
|
||||
@param[in] what_arg the explanatory string
|
||||
@return parse_error object
|
||||
*/
|
||||
static parse_error create(int id_, const position_t& pos, const std::string& what_arg)
|
||||
{
|
||||
std::string w = exception::name("parse_error", id_) + "parse error" +
|
||||
position_string(pos) + ": " + what_arg;
|
||||
return parse_error(id_, pos.chars_read_total, w.c_str());
|
||||
}
|
||||
|
||||
static parse_error create(int id_, std::size_t byte_, const std::string& what_arg)
|
||||
{
|
||||
std::string w = exception::name("parse_error", id_) + "parse error" +
|
||||
(byte_ != 0 ? (" at " + std::to_string(byte_)) : "") +
|
||||
(byte_ != 0 ? (" at byte " + std::to_string(byte_)) : "") +
|
||||
": " + what_arg;
|
||||
return parse_error(id_, byte_, w.c_str());
|
||||
}
|
||||
@ -754,6 +792,17 @@ class parse_error : public exception
|
||||
private:
|
||||
parse_error(int id_, std::size_t byte_, const char* what_arg)
|
||||
: exception(id_, what_arg), byte(byte_) {}
|
||||
|
||||
static std::string position_string(const position_t& pos)
|
||||
{
|
||||
if (pos.chars_read_total == 0)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
return " at line " + std::to_string(pos.lines_read + 1) +
|
||||
", column " + std::to_string(pos.chars_read_current_line);
|
||||
}
|
||||
};
|
||||
|
||||
/*!
|
||||
@ -2277,6 +2326,8 @@ class input_adapter
|
||||
|
||||
// #include <nlohmann/detail/input/input_adapters.hpp>
|
||||
|
||||
// #include <nlohmann/detail/input/position_t.hpp>
|
||||
|
||||
|
||||
namespace nlohmann
|
||||
{
|
||||
@ -2660,39 +2711,194 @@ class lexer
|
||||
|
||||
// invalid control characters
|
||||
case 0x00:
|
||||
{
|
||||
error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x01:
|
||||
{
|
||||
error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x02:
|
||||
{
|
||||
error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x03:
|
||||
{
|
||||
error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x04:
|
||||
{
|
||||
error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x05:
|
||||
{
|
||||
error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x06:
|
||||
{
|
||||
error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x07:
|
||||
{
|
||||
error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x08:
|
||||
{
|
||||
error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x09:
|
||||
{
|
||||
error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x0A:
|
||||
{
|
||||
error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x0B:
|
||||
{
|
||||
error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x0C:
|
||||
{
|
||||
error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x0D:
|
||||
{
|
||||
error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x0E:
|
||||
{
|
||||
error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x0F:
|
||||
{
|
||||
error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x10:
|
||||
{
|
||||
error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x11:
|
||||
{
|
||||
error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x12:
|
||||
{
|
||||
error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x13:
|
||||
{
|
||||
error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x14:
|
||||
{
|
||||
error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x15:
|
||||
{
|
||||
error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x16:
|
||||
{
|
||||
error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x17:
|
||||
{
|
||||
error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x18:
|
||||
{
|
||||
error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x19:
|
||||
{
|
||||
error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x1A:
|
||||
{
|
||||
error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x1B:
|
||||
{
|
||||
error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x1C:
|
||||
{
|
||||
error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x1D:
|
||||
{
|
||||
error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x1E:
|
||||
{
|
||||
error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
case 0x1F:
|
||||
{
|
||||
error_message = "invalid string: control character must be escaped";
|
||||
error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
|
||||
return token_type::parse_error;
|
||||
}
|
||||
|
||||
@ -3349,7 +3555,9 @@ scan_number_done:
|
||||
*/
|
||||
std::char_traits<char>::int_type get()
|
||||
{
|
||||
++chars_read;
|
||||
++position.chars_read_total;
|
||||
++position.chars_read_current_line;
|
||||
|
||||
if (next_unget)
|
||||
{
|
||||
// just reset the next_unget variable and work with current
|
||||
@ -3364,6 +3572,13 @@ scan_number_done:
|
||||
{
|
||||
token_string.push_back(std::char_traits<char>::to_char_type(current));
|
||||
}
|
||||
|
||||
if (current == '\n')
|
||||
{
|
||||
++position.lines_read;
|
||||
++position.chars_read_current_line = 0;
|
||||
}
|
||||
|
||||
return current;
|
||||
}
|
||||
|
||||
@ -3371,14 +3586,23 @@ scan_number_done:
|
||||
@brief unget current character (read it again on next get)
|
||||
|
||||
We implement unget by setting variable next_unget to true. The input is not
|
||||
changed - we just simulate ungetting by modifying chars_read and
|
||||
token_string. The next call to get() will behave as if the unget character
|
||||
is read again.
|
||||
changed - we just simulate ungetting by modifying chars_read_total,
|
||||
chars_read_current_line, and token_string. The next call to get() will
|
||||
behave as if the unget character is read again.
|
||||
*/
|
||||
void unget()
|
||||
{
|
||||
next_unget = true;
|
||||
--chars_read;
|
||||
|
||||
--position.chars_read_total;
|
||||
--position.chars_read_current_line;
|
||||
|
||||
// in case we "unget" a newline, we have to also decrement the lines_read
|
||||
if (position.lines_read != 0 and position.chars_read_current_line == 0)
|
||||
{
|
||||
--position.lines_read;
|
||||
}
|
||||
|
||||
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
|
||||
{
|
||||
assert(token_string.size() != 0);
|
||||
@ -3426,9 +3650,9 @@ scan_number_done:
|
||||
/////////////////////
|
||||
|
||||
/// return position of last read token
|
||||
constexpr std::size_t get_position() const noexcept
|
||||
constexpr position_t get_position() const noexcept
|
||||
{
|
||||
return chars_read;
|
||||
return position;
|
||||
}
|
||||
|
||||
/// return the last read token (for errors only). Will never contain EOF
|
||||
@ -3498,7 +3722,7 @@ scan_number_done:
|
||||
token_type scan()
|
||||
{
|
||||
// initially, skip the BOM
|
||||
if (chars_read == 0 and not skip_bom())
|
||||
if (position.chars_read_total == 0 and not skip_bom())
|
||||
{
|
||||
error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
|
||||
return token_type::parse_error;
|
||||
@ -3576,8 +3800,8 @@ scan_number_done:
|
||||
/// whether the next get() call should just return current
|
||||
bool next_unget = false;
|
||||
|
||||
/// the number of characters read
|
||||
std::size_t chars_read = 0;
|
||||
/// the start position of the current token
|
||||
position_t position;
|
||||
|
||||
/// raw input token string (for error messages)
|
||||
std::vector<char> token_string {};
|
||||
|
Reference in New Issue
Block a user