7 #ifndef PARSER_H_81248670213764583021432
8 #define PARSER_H_81248670213764583021432
12 #include <zen/string_tools.h>
32 const std::string& lineBreak =
"\r\n",
33 const std::string& indent =
" ");
76 namespace implementation
78 template <
class Predicate>
inline
79 std::string normalize(
const std::string& str, Predicate pred)
82 for (
const char c : str)
99 const auto hexDigits = hexify(c);
100 output += hexDigits.first;
101 output += hexDigits.second;
112 std::string normalizeName(
const std::string& str)
114 return normalize(str, [](
char c) {
return isWhiteSpace(c) || c ==
'=' || c ==
'/' || c ==
'\'' || c ==
'\"'; });
118 std::string normalizeElementValue(
const std::string& str)
120 return normalize(str, [](
char c) {
return static_cast<unsigned char>(c) < 32; });
124 std::string normalizeAttribValue(
const std::string& str)
126 return normalize(str, [](
char c) {
return static_cast<unsigned char>(c) < 32 || c ==
'\'' || c ==
'\"'; });
130 template <
class CharIterator,
size_t N>
inline
131 bool checkEntity(CharIterator& first, CharIterator last,
const char (&placeholder)[N])
133 assert(placeholder[N - 1] == 0);
134 const ptrdiff_t strLen = N - 1;
135 if (last - first >= strLen && std::equal(first, first + strLen, placeholder))
146 std::string denormalize(
const std::string& str)
149 for (
auto it = str.begin(); it != str.end(); ++it)
155 if (checkEntity(it, str.end(),
"&"))
157 else if (checkEntity(it, str.end(),
"<"))
159 else if (checkEntity(it, str.end(),
">"))
161 else if (checkEntity(it, str.end(),
"'"))
163 else if (checkEntity(it, str.end(),
"""))
165 else if (str.end() - it >= 6 &&
170 output += unhexify(it[3], it[4]);
178 auto itNext = it + 1;
179 if (itNext != str.end() && *itNext ==
'\n')
190 void serialize(
const XmlElement& element, std::string& stream,
191 const std::string& lineBreak,
192 const std::string& indent,
195 const std::string& nameFmt = normalizeName(element.getNameAs<std::string>());
197 for (
size_t i = 0; i < indentLevel; ++i)
200 stream +=
'<' + nameFmt;
202 auto attr = element.getAttributes();
203 for (
auto it = attr.first; it != attr.second; ++it)
204 stream +=
' ' + normalizeName(it->first) +
"=\"" + normalizeAttribValue(it->second) +
'\"';
207 auto iterPair = element.getChildren();
208 if (iterPair.first != iterPair.second)
210 stream +=
'>' + lineBreak;
212 std::for_each(iterPair.first, iterPair.second,
213 [&](
const XmlElement& el) { serialize(el, stream, lineBreak, indent, indentLevel + 1); });
215 for (
size_t i = 0; i < indentLevel; ++i)
217 stream +=
"</" + nameFmt +
'>' + lineBreak;
222 element.getValue(value);
225 stream +=
'>' + normalizeElementValue(value) +
"</" + nameFmt +
'>' + lineBreak;
227 stream +=
"/>" + lineBreak;
232 const std::string& lineBreak,
233 const std::string& indent)
235 std::string version = doc.getVersionAs<std::string>();
236 if (!version.empty())
237 version =
" version=\"" + normalizeAttribValue(version) +
'\"';
239 std::string encoding = doc.getEncodingAs<std::string>();
240 if (!encoding.empty())
241 encoding =
" encoding=\"" + normalizeAttribValue(encoding) +
'\"';
243 std::string standalone = doc.getStandaloneAs<std::string>();
244 if (!standalone.empty())
245 standalone =
" standalone=\"" + normalizeAttribValue(standalone) +
'\"';
247 std::string output =
"<?xml" + version + encoding + standalone +
"?>" + lineBreak;
248 serialize(doc.root(), output, lineBreak, indent, 0);
256 const std::string& lineBreak,
257 const std::string& indent) {
return implementation::serialize(doc, lineBreak, indent); }
283 namespace implementation
301 Token(Type t) : type(t) {}
302 Token(
const std::string& txt) : type(TK_NAME), name(txt) {}
311 Scanner(
const std::string& stream) : stream_(stream), pos(stream_.begin())
313 if (zen::startsWith(stream_, BYTE_ORDER_MARK_UTF8))
314 pos += strLength(BYTE_ORDER_MARK_UTF8);
320 pos = std::find_if(pos, stream_.end(), [](
char c) {
return !zen::isWhiteSpace(c); });
322 if (pos == stream_.end())
323 return Token::TK_END;
326 if (startsWith(xmlCommentBegin))
328 auto it = std::search(pos + xmlCommentBegin.size(), stream_.end(), xmlCommentEnd.begin(), xmlCommentEnd.end());
329 if (it != stream_.end())
331 pos = it + xmlCommentEnd.size();
336 for (
auto it = tokens.begin(); it != tokens.end(); ++it)
337 if (startsWith(it->first))
339 pos += it->first.size();
343 auto nameEnd = std::find_if(pos, stream_.end(), [](
char c)
351 zen::isWhiteSpace(c);
356 std::string name(&*pos, nameEnd - pos);
358 return implementation::denormalize(name);
362 throw XmlParsingError(posRow(), posCol());
365 std::string extractElementValue()
367 auto it = std::find_if(pos, stream_.end(), [](
char c)
372 std::string output(pos, it);
374 return implementation::denormalize(output);
377 std::string extractAttributeValue()
379 auto it = std::find_if(pos, stream_.end(), [](
char c)
386 std::string output(pos, it);
388 return implementation::denormalize(output);
391 size_t posRow() const
393 const size_t crSum = std::count(stream_.begin(), pos,
'\r');
394 const size_t nlSum = std::count(stream_.begin(), pos,
'\n');
395 assert(crSum == 0 || nlSum == 0 || crSum == nlSum);
396 return std::max(crSum, nlSum);
399 size_t posCol() const
402 for (
auto it = pos; it != stream_.begin(); )
405 if (*it ==
'\r' || *it ==
'\n')
408 return pos - stream_.begin();
412 Scanner (
const Scanner&) =
delete;
413 Scanner& operator=(
const Scanner&) =
delete;
415 bool startsWith(
const std::string& prefix)
const
417 if (stream_.end() - pos < static_cast<ptrdiff_t>(prefix.size()))
419 return std::equal(prefix.begin(), prefix.end(), pos);
422 using TokenList = std::vector<std::pair<std::string, Token::Type>>;
423 const TokenList tokens
425 {
"<?xml", Token::TK_DECL_BEGIN },
426 {
"?>", Token::TK_DECL_END },
427 {
"</", Token::TK_LESS_SLASH },
428 {
"/>", Token::TK_SLASH_GREATER },
429 {
"<" , Token::TK_LESS },
430 {
">" , Token::TK_GREATER },
431 {
"=" , Token::TK_EQUAL },
432 {
"\"", Token::TK_QUOTE },
433 {
"\'", Token::TK_QUOTE },
436 const std::string xmlCommentBegin =
"<!--";
437 const std::string xmlCommentEnd =
"-->";
439 const std::string stream_;
440 std::string::const_iterator pos;
447 XmlParser(
const std::string& stream) :
449 tk(scn.nextToken()) {}
456 if (token().type == Token::TK_DECL_BEGIN)
460 while (token().type == Token::TK_NAME)
462 std::string attribName = token().name;
465 consumeToken(Token::TK_EQUAL);
466 expectToken(Token::TK_QUOTE);
467 std::string attribValue = scn.extractAttributeValue();
470 consumeToken(Token::TK_QUOTE);
472 if (attribName ==
"version")
473 doc.setVersion(attribValue);
474 else if (attribName ==
"encoding")
475 doc.setEncoding(attribValue);
476 else if (attribName ==
"standalone")
477 doc.setStandalone(attribValue);
479 consumeToken(Token::TK_DECL_END);
483 parseChildElements(dummy);
485 auto itPair = dummy.getChildren();
486 if (itPair.first != itPair.second)
487 doc.root().swapSubtree(*itPair.first);
489 expectToken(Token::TK_END);
494 XmlParser (
const XmlParser&) =
delete;
495 XmlParser& operator=(
const XmlParser&) =
delete;
497 void parseChildElements(XmlElement& parent)
499 while (token().type == Token::TK_LESS)
503 expectToken(Token::TK_NAME);
504 std::string elementName = token().name;
507 XmlElement& newElement = parent.addChild(elementName);
509 parseAttributes(newElement);
511 if (token().type == Token::TK_SLASH_GREATER)
517 expectToken(Token::TK_GREATER);
518 std::string elementValue = scn.extractElementValue();
522 if (token().type == Token::TK_LESS)
523 parseChildElements(newElement);
525 newElement.setValue(elementValue);
527 consumeToken(Token::TK_LESS_SLASH);
529 if (token().type != Token::TK_NAME ||
530 elementName != token().name)
531 throw XmlParsingError(scn.posRow(), scn.posCol());
534 consumeToken(Token::TK_GREATER);
538 void parseAttributes(XmlElement& element)
540 while (token().type == Token::TK_NAME)
542 std::string attribName = token().name;
545 consumeToken(Token::TK_EQUAL);
546 expectToken(Token::TK_QUOTE);
547 std::string attribValue = scn.extractAttributeValue();
550 consumeToken(Token::TK_QUOTE);
551 element.setAttribute(attribName, attribValue);
555 const Token& token()
const {
return tk; }
556 void nextToken() { tk = scn.nextToken(); }
558 void consumeToken(Token::Type t)
564 void expectToken(Token::Type t)
566 if (token().type != t)
567 throw XmlParsingError(scn.posRow(), scn.posCol());
578 return implementation::XmlParser(stream).parse();
582 #endif //PARSER_H_81248670213764583021432
XmlDoc parse(const std::string &stream)
Load XML document from a byte stream.
Definition: parser.h:576
std::string serialize(const XmlDoc &doc, const std::string &lineBreak="\r\n", const std::string &indent=" ")
Save XML document as a byte stream.
Definition: parser.h:255
The zen::Xml namespace.
Definition: bind.h:15
The complete XML document.
Definition: dom.h:246
const size_t row
Input file row where the parsing error occured (zero-based)
Definition: parser.h:40
Exception thrown due to an XML parsing error.
Definition: parser.h:36
Exception base class for zen::Xml.
Definition: error.h:13
const size_t col
Input file column where the parsing error occured (zero-based)
Definition: parser.h:42