mirror of
https://github.com/nlohmann/json.git
synced 2025-07-29 23:01:16 +03:00
✨ added a SAX parser #971
This commit is contained in:
@ -3175,6 +3175,53 @@ class parser
|
||||
value
|
||||
};
|
||||
|
||||
struct SAX
|
||||
{
|
||||
/// a null value was read
|
||||
virtual bool null() = 0;
|
||||
|
||||
/// a boolean value was read
|
||||
virtual bool boolean(bool) = 0;
|
||||
|
||||
/// an integer number was read
|
||||
virtual bool number_integer(number_integer_t) = 0;
|
||||
|
||||
/// an unsigned integer number was read
|
||||
virtual bool number_unsigned(number_unsigned_t) = 0;
|
||||
|
||||
/// a floating-point number was read
|
||||
/// the string parameter contains the raw number value
|
||||
virtual bool number_float(number_float_t, const std::string&) = 0;
|
||||
|
||||
/// a string value was read
|
||||
virtual bool string(const std::string&) = 0;
|
||||
|
||||
/// the beginning of an object was read
|
||||
/// binary formats may report the number of elements
|
||||
virtual bool start_object(std::size_t elements) = 0;
|
||||
|
||||
/// an object key was read
|
||||
virtual bool key(const std::string&) = 0;
|
||||
|
||||
/// the end of an object was read
|
||||
virtual bool end_object() = 0;
|
||||
|
||||
/// the beginning of an array was read
|
||||
/// binary formats may report the number of elements
|
||||
virtual bool start_array(std::size_t elements) = 0;
|
||||
|
||||
/// the end of an array was read
|
||||
virtual bool end_array() = 0;
|
||||
|
||||
/// a binary value was read
|
||||
/// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array<uint8t>
|
||||
virtual bool binary(const std::vector<uint8_t>& vec) = 0;
|
||||
|
||||
/// a parse error occurred
|
||||
/// the byte position and the last token are reported
|
||||
virtual bool parse_error(int position, const std::string& last_token) = 0;
|
||||
};
|
||||
|
||||
using parser_callback_t =
|
||||
std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
|
||||
|
||||
@ -3185,6 +3232,10 @@ class parser
|
||||
: callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_)
|
||||
{}
|
||||
|
||||
parser(detail::input_adapter_t adapter, SAX* s)
|
||||
: m_lexer(adapter), sax(s)
|
||||
{}
|
||||
|
||||
/*!
|
||||
@brief public parser interface
|
||||
|
||||
@ -3245,6 +3296,14 @@ class parser
|
||||
return not strict or (get_token() == token_type::end_of_input);
|
||||
}
|
||||
|
||||
bool sax_parse()
|
||||
{
|
||||
// read first token
|
||||
get_token();
|
||||
|
||||
return sax_parse_internal();
|
||||
}
|
||||
|
||||
private:
|
||||
/*!
|
||||
@brief the actual parser
|
||||
@ -3643,6 +3702,168 @@ class parser
|
||||
}
|
||||
}
|
||||
|
||||
bool sax_parse_internal()
|
||||
{
|
||||
switch (last_token)
|
||||
{
|
||||
case token_type::begin_object:
|
||||
{
|
||||
if (not sax->start_object(-1))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// read next token
|
||||
get_token();
|
||||
|
||||
// closing } -> we are done
|
||||
if (last_token == token_type::end_object)
|
||||
{
|
||||
return sax->end_object();
|
||||
}
|
||||
|
||||
// parse values
|
||||
while (true)
|
||||
{
|
||||
// parse key
|
||||
if (last_token != token_type::value_string)
|
||||
{
|
||||
if (not sax->key(m_lexer.move_string()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// parse separator (:)
|
||||
get_token();
|
||||
if (last_token != token_type::name_separator)
|
||||
{
|
||||
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
|
||||
}
|
||||
|
||||
// parse value
|
||||
get_token();
|
||||
if (not sax_parse_internal())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// comma -> next value
|
||||
get_token();
|
||||
if (last_token == token_type::value_separator)
|
||||
{
|
||||
get_token();
|
||||
continue;
|
||||
}
|
||||
|
||||
// closing }
|
||||
if (last_token == token_type::end_object)
|
||||
{
|
||||
return sax->end_object();
|
||||
}
|
||||
else
|
||||
{
|
||||
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case token_type::begin_array:
|
||||
{
|
||||
if (not sax->start_array(-1))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// read next token
|
||||
get_token();
|
||||
|
||||
// closing ] -> we are done
|
||||
if (last_token == token_type::end_array)
|
||||
{
|
||||
return sax->end_array();
|
||||
}
|
||||
|
||||
// parse values
|
||||
while (true)
|
||||
{
|
||||
// parse value
|
||||
if (not sax_parse_internal())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// comma -> next value
|
||||
get_token();
|
||||
if (last_token == token_type::value_separator)
|
||||
{
|
||||
get_token();
|
||||
continue;
|
||||
}
|
||||
|
||||
// closing ]
|
||||
if (last_token == token_type::end_array)
|
||||
{
|
||||
return sax->end_array();
|
||||
}
|
||||
else
|
||||
{
|
||||
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case token_type::value_float:
|
||||
{
|
||||
const auto res = m_lexer.get_number_float();
|
||||
|
||||
if (JSON_UNLIKELY(not std::isfinite(res)))
|
||||
{
|
||||
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
|
||||
}
|
||||
else
|
||||
{
|
||||
return sax->number_float(res, m_lexer.move_string());
|
||||
}
|
||||
}
|
||||
|
||||
case token_type::literal_false:
|
||||
{
|
||||
return sax->boolean(false);
|
||||
}
|
||||
|
||||
case token_type::literal_null:
|
||||
{
|
||||
return sax->null();
|
||||
}
|
||||
|
||||
case token_type::literal_true:
|
||||
{
|
||||
return sax->boolean(true);
|
||||
}
|
||||
|
||||
case token_type::value_integer:
|
||||
{
|
||||
return sax->number_integer(m_lexer.get_number_integer());
|
||||
}
|
||||
|
||||
case token_type::value_string:
|
||||
{
|
||||
return sax->string(m_lexer.move_string());
|
||||
}
|
||||
|
||||
case token_type::value_unsigned:
|
||||
{
|
||||
return sax->number_unsigned(m_lexer.get_number_unsigned());
|
||||
}
|
||||
|
||||
default: // the last token was unexpected
|
||||
{
|
||||
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// get next token from lexer
|
||||
token_type get_token()
|
||||
{
|
||||
@ -3707,6 +3928,8 @@ class parser
|
||||
token_type expected = token_type::uninitialized;
|
||||
/// whether to throw exceptions in case of errors
|
||||
const bool allow_exceptions = true;
|
||||
/// associated SAX parse event receiver
|
||||
SAX* sax = nullptr;
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -10652,6 +10875,8 @@ class basic_json
|
||||
*/
|
||||
using parse_event_t = typename parser::parse_event_t;
|
||||
|
||||
using SAX = typename parser::SAX;
|
||||
|
||||
/*!
|
||||
@brief per-element parser callback type
|
||||
|
||||
@ -15523,6 +15748,16 @@ class basic_json
|
||||
return parser(i).accept(true);
|
||||
}
|
||||
|
||||
static bool sax_parse(detail::input_adapter i, SAX* sax)
|
||||
{
|
||||
return parser(i, sax).sax_parse();
|
||||
}
|
||||
|
||||
static bool sax_parse(detail::input_adapter& i, SAX* sax)
|
||||
{
|
||||
return parser(i, sax).sax_parse();
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief deserialize from an iterator range with contiguous storage
|
||||
|
||||
@ -15592,6 +15827,15 @@ class basic_json
|
||||
return parser(detail::input_adapter(first, last)).accept(true);
|
||||
}
|
||||
|
||||
template<class IteratorType, typename std::enable_if<
|
||||
std::is_base_of<
|
||||
std::random_access_iterator_tag,
|
||||
typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0>
|
||||
static bool sax_parse(IteratorType first, IteratorType last, SAX* sax)
|
||||
{
|
||||
return parser(detail::input_adapter(first, last), sax).sax_parse();
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief deserialize from stream
|
||||
@deprecated This stream operator is deprecated and will be removed in
|
||||
|
Reference in New Issue
Block a user