JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <algorithm>
14 #include <cassert>
15 #include <cmath>
16 #include <cstring>
17 #include <iostream>
18 #include <istream>
19 #include <iterator>
20 #include <limits>
21 #include <memory>
22 #include <set>
23 #include <sstream>
24 #include <utility>
25 
26 #include <cstdio>
27 
28 #if defined(_MSC_VER)
29 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
30 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
31 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
32 #endif //_MSC_VER
33 
34 #if defined(_MSC_VER)
35 // Disable warning about strdup being deprecated.
36 #pragma warning(disable : 4996)
37 #endif
38 
39 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
40 // time to change the stack limit
41 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
42 #define JSONCPP_DEPRECATED_STACK_LIMIT 256
43 #endif
44 
45 static size_t const stackLimit_g =
46  JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
47 
48 namespace Json {
49 
50 using CharReaderPtr = std::unique_ptr<CharReader>;
51 
52 // Implementation of class Features
53 // ////////////////////////////////
54 
55 Features::Features() = default;
56 
57 Features Features::all() { return {}; }
58 
60  Features features;
61  features.allowComments_ = false;
62  features.strictRoot_ = true;
63  features.allowDroppedNullPlaceholders_ = false;
64  features.allowNumericKeys_ = false;
65  return features;
66 }
67 
68 // Implementation of class Reader
69 // ////////////////////////////////
70 
71 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
72  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
73 }
74 
75 // Class Reader
76 // //////////////////////////////////////////////////////////////////
77 
78 Reader::Reader() : features_(Features::all()) {}
79 
80 Reader::Reader(const Features& features) : features_(features) {}
81 
82 bool Reader::parse(const std::string& document, Value& root,
83  bool collectComments) {
84  document_.assign(document.begin(), document.end());
85  const char* begin = document_.c_str();
86  const char* end = begin + document_.length();
87  return parse(begin, end, root, collectComments);
88 }
89 
90 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
91  document_.assign(std::istreambuf_iterator<char>(is),
92  std::istreambuf_iterator<char>());
93  return parse(document_.data(), document_.data() + document_.size(), root,
94  collectComments);
95 }
96 
97 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
98  bool collectComments) {
99  if (!features_.allowComments_) {
100  collectComments = false;
101  }
102 
103  begin_ = beginDoc;
104  end_ = endDoc;
105  collectComments_ = collectComments;
106  current_ = begin_;
107  lastValueEnd_ = nullptr;
108  lastValue_ = nullptr;
109  commentsBefore_.clear();
110  errors_.clear();
111  while (!nodes_.empty())
112  nodes_.pop();
113  nodes_.push(&root);
114 
115  bool successful = readValue();
116  Token token;
117  readTokenSkippingComments(token);
118  if (collectComments_ && !commentsBefore_.empty())
119  root.setComment(commentsBefore_, commentAfter);
120  if (features_.strictRoot_) {
121  if (!root.isArray() && !root.isObject()) {
122  // Set error location to start of doc, ideally should be first token found
123  // in doc
124  token.type_ = tokenError;
125  token.start_ = beginDoc;
126  token.end_ = endDoc;
127  addError(
128  "A valid JSON document must be either an array or an object value.",
129  token);
130  return false;
131  }
132  }
133  return successful;
134 }
135 
136 bool Reader::readValue() {
137  // readValue() may call itself only if it calls readObject() or ReadArray().
138  // These methods execute nodes_.push() just before and nodes_.pop)() just
139  // after calling readValue(). parse() executes one nodes_.push(), so > instead
140  // of >=.
141  if (nodes_.size() > stackLimit_g)
143  throwRuntimeError("Exceeded stackLimit in readValue().");
144 #else
145  // throwRuntimeError aborts. Don't abort here.
146  return false;
147 #endif
148 
149  Token token;
150  readTokenSkippingComments(token);
151  bool successful = true;
152 
153  if (collectComments_ && !commentsBefore_.empty()) {
154  currentValue().setComment(commentsBefore_, commentBefore);
155  commentsBefore_.clear();
156  }
157 
158  switch (token.type_) {
159  case tokenObjectBegin:
160  successful = readObject(token);
161  currentValue().setOffsetLimit(current_ - begin_);
162  break;
163  case tokenArrayBegin:
164  successful = readArray(token);
165  currentValue().setOffsetLimit(current_ - begin_);
166  break;
167  case tokenNumber:
168  successful = decodeNumber(token);
169  break;
170  case tokenString:
171  successful = decodeString(token);
172  break;
173  case tokenTrue: {
174  Value v(true);
175  currentValue().swapPayload(v);
176  currentValue().setOffsetStart(token.start_ - begin_);
177  currentValue().setOffsetLimit(token.end_ - begin_);
178  } break;
179  case tokenFalse: {
180  Value v(false);
181  currentValue().swapPayload(v);
182  currentValue().setOffsetStart(token.start_ - begin_);
183  currentValue().setOffsetLimit(token.end_ - begin_);
184  } break;
185  case tokenNull: {
186  Value v;
187  currentValue().swapPayload(v);
188  currentValue().setOffsetStart(token.start_ - begin_);
189  currentValue().setOffsetLimit(token.end_ - begin_);
190  } break;
191  case tokenArraySeparator:
192  case tokenObjectEnd:
193  case tokenArrayEnd:
194  if (features_.allowDroppedNullPlaceholders_) {
195  // "Un-read" the current token and mark the current value as a null
196  // token.
197  current_--;
198  Value v;
199  currentValue().swapPayload(v);
200  currentValue().setOffsetStart(current_ - begin_ - 1);
201  currentValue().setOffsetLimit(current_ - begin_);
202  break;
203  } // Else, fall through...
204  default:
205  currentValue().setOffsetStart(token.start_ - begin_);
206  currentValue().setOffsetLimit(token.end_ - begin_);
207  return addError("Syntax error: value, object or array expected.", token);
208  }
209 
210  if (collectComments_) {
211  lastValueEnd_ = current_;
212  lastValue_ = &currentValue();
213  }
214 
215  return successful;
216 }
217 
218 bool Reader::readTokenSkippingComments(Token& token) {
219  bool success = readToken(token);
220  if (features_.allowComments_) {
221  while (success && token.type_ == tokenComment) {
222  success = readToken(token);
223  }
224  }
225  return success;
226 }
227 
228 bool Reader::readToken(Token& token) {
229  skipSpaces();
230  token.start_ = current_;
231  Char c = getNextChar();
232  bool ok = true;
233  switch (c) {
234  case '{':
235  token.type_ = tokenObjectBegin;
236  break;
237  case '}':
238  token.type_ = tokenObjectEnd;
239  break;
240  case '[':
241  token.type_ = tokenArrayBegin;
242  break;
243  case ']':
244  token.type_ = tokenArrayEnd;
245  break;
246  case '"':
247  token.type_ = tokenString;
248  ok = readString();
249  break;
250  case '/':
251  token.type_ = tokenComment;
252  ok = readComment();
253  break;
254  case '0':
255  case '1':
256  case '2':
257  case '3':
258  case '4':
259  case '5':
260  case '6':
261  case '7':
262  case '8':
263  case '9':
264  case '-':
265  token.type_ = tokenNumber;
266  readNumber();
267  break;
268  case 't':
269  token.type_ = tokenTrue;
270  ok = match("rue", 3);
271  break;
272  case 'f':
273  token.type_ = tokenFalse;
274  ok = match("alse", 4);
275  break;
276  case 'n':
277  token.type_ = tokenNull;
278  ok = match("ull", 3);
279  break;
280  case ',':
281  token.type_ = tokenArraySeparator;
282  break;
283  case ':':
284  token.type_ = tokenMemberSeparator;
285  break;
286  case 0:
287  token.type_ = tokenEndOfStream;
288  break;
289  default:
290  ok = false;
291  break;
292  }
293  if (!ok)
294  token.type_ = tokenError;
295  token.end_ = current_;
296  return ok;
297 }
298 
299 void Reader::skipSpaces() {
300  while (current_ != end_) {
301  Char c = *current_;
302  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
303  ++current_;
304  else
305  break;
306  }
307 }
308 
309 bool Reader::match(const Char* pattern, int patternLength) {
310  if (end_ - current_ < patternLength)
311  return false;
312  int index = patternLength;
313  while (index--)
314  if (current_[index] != pattern[index])
315  return false;
316  current_ += patternLength;
317  return true;
318 }
319 
320 bool Reader::readComment() {
321  Location commentBegin = current_ - 1;
322  Char c = getNextChar();
323  bool successful = false;
324  if (c == '*')
325  successful = readCStyleComment();
326  else if (c == '/')
327  successful = readCppStyleComment();
328  if (!successful)
329  return false;
330 
331  if (collectComments_) {
332  CommentPlacement placement = commentBefore;
333  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
334  if (c != '*' || !containsNewLine(commentBegin, current_))
335  placement = commentAfterOnSameLine;
336  }
337 
338  addComment(commentBegin, current_, placement);
339  }
340  return true;
341 }
342 
343 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
344  String normalized;
345  normalized.reserve(static_cast<size_t>(end - begin));
346  Reader::Location current = begin;
347  while (current != end) {
348  char c = *current++;
349  if (c == '\r') {
350  if (current != end && *current == '\n')
351  // convert dos EOL
352  ++current;
353  // convert Mac EOL
354  normalized += '\n';
355  } else {
356  normalized += c;
357  }
358  }
359  return normalized;
360 }
361 
362 void Reader::addComment(Location begin, Location end,
363  CommentPlacement placement) {
364  assert(collectComments_);
365  const String& normalized = normalizeEOL(begin, end);
366  if (placement == commentAfterOnSameLine) {
367  assert(lastValue_ != nullptr);
368  lastValue_->setComment(normalized, placement);
369  } else {
370  commentsBefore_ += normalized;
371  }
372 }
373 
374 bool Reader::readCStyleComment() {
375  while ((current_ + 1) < end_) {
376  Char c = getNextChar();
377  if (c == '*' && *current_ == '/')
378  break;
379  }
380  return getNextChar() == '/';
381 }
382 
383 bool Reader::readCppStyleComment() {
384  while (current_ != end_) {
385  Char c = getNextChar();
386  if (c == '\n')
387  break;
388  if (c == '\r') {
389  // Consume DOS EOL. It will be normalized in addComment.
390  if (current_ != end_ && *current_ == '\n')
391  getNextChar();
392  // Break on Moc OS 9 EOL.
393  break;
394  }
395  }
396  return true;
397 }
398 
399 void Reader::readNumber() {
400  Location p = current_;
401  char c = '0'; // stopgap for already consumed character
402  // integral part
403  while (c >= '0' && c <= '9')
404  c = (current_ = p) < end_ ? *p++ : '\0';
405  // fractional part
406  if (c == '.') {
407  c = (current_ = p) < end_ ? *p++ : '\0';
408  while (c >= '0' && c <= '9')
409  c = (current_ = p) < end_ ? *p++ : '\0';
410  }
411  // exponential part
412  if (c == 'e' || c == 'E') {
413  c = (current_ = p) < end_ ? *p++ : '\0';
414  if (c == '+' || c == '-')
415  c = (current_ = p) < end_ ? *p++ : '\0';
416  while (c >= '0' && c <= '9')
417  c = (current_ = p) < end_ ? *p++ : '\0';
418  }
419 }
420 
421 bool Reader::readString() {
422  Char c = '\0';
423  while (current_ != end_) {
424  c = getNextChar();
425  if (c == '\\')
426  getNextChar();
427  else if (c == '"')
428  break;
429  }
430  return c == '"';
431 }
432 
433 bool Reader::readObject(Token& token) {
434  Token tokenName;
435  String name;
436  Value init(objectValue);
437  currentValue().swapPayload(init);
438  currentValue().setOffsetStart(token.start_ - begin_);
439  while (readTokenSkippingComments(tokenName)) {
440  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
441  return true;
442  name.clear();
443  if (tokenName.type_ == tokenString) {
444  if (!decodeString(tokenName, name))
445  return recoverFromError(tokenObjectEnd);
446  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
447  Value numberName;
448  if (!decodeNumber(tokenName, numberName))
449  return recoverFromError(tokenObjectEnd);
450  name = numberName.asString();
451  } else {
452  break;
453  }
454 
455  Token colon;
456  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
457  return addErrorAndRecover("Missing ':' after object member name", colon,
458  tokenObjectEnd);
459  }
460  Value& value = currentValue()[name];
461  nodes_.push(&value);
462  bool ok = readValue();
463  nodes_.pop();
464  if (!ok) // error already set
465  return recoverFromError(tokenObjectEnd);
466 
467  Token comma;
468  if (!readTokenSkippingComments(comma) ||
469  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
470  return addErrorAndRecover("Missing ',' or '}' in object declaration",
471  comma, tokenObjectEnd);
472  }
473  if (comma.type_ == tokenObjectEnd)
474  return true;
475  }
476  return addErrorAndRecover("Missing '}' or object member name", tokenName,
477  tokenObjectEnd);
478 }
479 
480 bool Reader::readArray(Token& token) {
481  Value init(arrayValue);
482  currentValue().swapPayload(init);
483  currentValue().setOffsetStart(token.start_ - begin_);
484  skipSpaces();
485  if (current_ != end_ && *current_ == ']') // empty array
486  {
487  Token endArray;
488  readToken(endArray);
489  return true;
490  }
491  int index = 0;
492  for (;;) {
493  Value& value = currentValue()[index++];
494  nodes_.push(&value);
495  bool ok = readValue();
496  nodes_.pop();
497  if (!ok) // error already set
498  return recoverFromError(tokenArrayEnd);
499 
500  Token currentToken;
501  // Accept Comment after last item in the array.
502  ok = readTokenSkippingComments(currentToken);
503  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
504  currentToken.type_ != tokenArrayEnd);
505  if (!ok || badTokenType) {
506  return addErrorAndRecover("Missing ',' or ']' in array declaration",
507  currentToken, tokenArrayEnd);
508  }
509  if (currentToken.type_ == tokenArrayEnd)
510  break;
511  }
512  return true;
513 }
514 
515 bool Reader::decodeNumber(Token& token) {
516  Value decoded;
517  if (!decodeNumber(token, decoded))
518  return false;
519  currentValue().swapPayload(decoded);
520  currentValue().setOffsetStart(token.start_ - begin_);
521  currentValue().setOffsetLimit(token.end_ - begin_);
522  return true;
523 }
524 
525 bool Reader::decodeNumber(Token& token, Value& decoded) {
526  // Attempts to parse the number as an integer. If the number is
527  // larger than the maximum supported value of an integer then
528  // we decode the number as a double.
529  Location current = token.start_;
530  bool isNegative = *current == '-';
531  if (isNegative)
532  ++current;
533  // TODO: Help the compiler do the div and mod at compile time or get rid of
534  // them.
535  Value::LargestUInt maxIntegerValue =
536  isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
538  Value::LargestUInt threshold = maxIntegerValue / 10;
539  Value::LargestUInt value = 0;
540  while (current < token.end_) {
541  Char c = *current++;
542  if (c < '0' || c > '9')
543  return decodeDouble(token, decoded);
544  auto digit(static_cast<Value::UInt>(c - '0'));
545  if (value >= threshold) {
546  // We've hit or exceeded the max value divided by 10 (rounded down). If
547  // a) we've only just touched the limit, b) this is the last digit, and
548  // c) it's small enough to fit in that rounding delta, we're okay.
549  // Otherwise treat this number as a double to avoid overflow.
550  if (value > threshold || current != token.end_ ||
551  digit > maxIntegerValue % 10) {
552  return decodeDouble(token, decoded);
553  }
554  }
555  value = value * 10 + digit;
556  }
557  if (isNegative && value == maxIntegerValue)
558  decoded = Value::minLargestInt;
559  else if (isNegative)
560  decoded = -Value::LargestInt(value);
561  else if (value <= Value::LargestUInt(Value::maxInt))
562  decoded = Value::LargestInt(value);
563  else
564  decoded = value;
565  return true;
566 }
567 
568 bool Reader::decodeDouble(Token& token) {
569  Value decoded;
570  if (!decodeDouble(token, decoded))
571  return false;
572  currentValue().swapPayload(decoded);
573  currentValue().setOffsetStart(token.start_ - begin_);
574  currentValue().setOffsetLimit(token.end_ - begin_);
575  return true;
576 }
577 
578 bool Reader::decodeDouble(Token& token, Value& decoded) {
579  double value = 0;
580  IStringStream is(String(token.start_, token.end_));
581  is.imbue(std::locale::classic());
582  if (!(is >> value)) {
583  if (value == std::numeric_limits<double>::max())
584  value = std::numeric_limits<double>::infinity();
585  else if (value == std::numeric_limits<double>::lowest())
586  value = -std::numeric_limits<double>::infinity();
587  else if (!std::isinf(value))
588  return addError(
589  "'" + String(token.start_, token.end_) + "' is not a number.", token);
590  }
591  decoded = value;
592  return true;
593 }
594 
595 bool Reader::decodeString(Token& token) {
596  String decoded_string;
597  if (!decodeString(token, decoded_string))
598  return false;
599  Value decoded(decoded_string);
600  currentValue().swapPayload(decoded);
601  currentValue().setOffsetStart(token.start_ - begin_);
602  currentValue().setOffsetLimit(token.end_ - begin_);
603  return true;
604 }
605 
606 bool Reader::decodeString(Token& token, String& decoded) {
607  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
608  Location current = token.start_ + 1; // skip '"'
609  Location end = token.end_ - 1; // do not include '"'
610  while (current != end) {
611  Char c = *current++;
612  if (c == '"')
613  break;
614  if (c == '\\') {
615  if (current == end)
616  return addError("Empty escape sequence in string", token, current);
617  Char escape = *current++;
618  switch (escape) {
619  case '"':
620  decoded += '"';
621  break;
622  case '/':
623  decoded += '/';
624  break;
625  case '\\':
626  decoded += '\\';
627  break;
628  case 'b':
629  decoded += '\b';
630  break;
631  case 'f':
632  decoded += '\f';
633  break;
634  case 'n':
635  decoded += '\n';
636  break;
637  case 'r':
638  decoded += '\r';
639  break;
640  case 't':
641  decoded += '\t';
642  break;
643  case 'u': {
644  unsigned int unicode;
645  if (!decodeUnicodeCodePoint(token, current, end, unicode))
646  return false;
647  decoded += codePointToUTF8(unicode);
648  } break;
649  default:
650  return addError("Bad escape sequence in string", token, current);
651  }
652  } else {
653  if (static_cast<unsigned char>(c) < 0x20)
654  return addError("Control character in string", token, current - 1);
655  decoded += c;
656  }
657  }
658  return true;
659 }
660 
661 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
662  Location end, unsigned int& unicode) {
663 
664  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
665  return false;
666  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
667  // surrogate pairs
668  if (end - current < 6)
669  return addError(
670  "additional six characters expected to parse unicode surrogate pair.",
671  token, current);
672  if (*(current++) == '\\' && *(current++) == 'u') {
673  unsigned int surrogatePair;
674  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
675  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
676  } else
677  return false;
678  } else
679  return addError("expecting another \\u token to begin the second half of "
680  "a unicode surrogate pair",
681  token, current);
682  }
683  return true;
684 }
685 
686 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
687  Location end,
688  unsigned int& ret_unicode) {
689  if (end - current < 4)
690  return addError(
691  "Bad unicode escape sequence in string: four digits expected.", token,
692  current);
693  int unicode = 0;
694  for (int index = 0; index < 4; ++index) {
695  Char c = *current++;
696  unicode *= 16;
697  if (c >= '0' && c <= '9')
698  unicode += c - '0';
699  else if (c >= 'a' && c <= 'f')
700  unicode += c - 'a' + 10;
701  else if (c >= 'A' && c <= 'F')
702  unicode += c - 'A' + 10;
703  else
704  return addError(
705  "Bad unicode escape sequence in string: hexadecimal digit expected.",
706  token, current);
707  }
708  ret_unicode = static_cast<unsigned int>(unicode);
709  return true;
710 }
711 
712 bool Reader::addError(const String& message, Token& token, Location extra) {
713  ErrorInfo info;
714  info.token_ = token;
715  info.message_ = message;
716  info.extra_ = extra;
717  errors_.push_back(info);
718  return false;
719 }
720 
721 bool Reader::recoverFromError(TokenType skipUntilToken) {
722  size_t const errorCount = errors_.size();
723  Token skip;
724  for (;;) {
725  if (!readToken(skip))
726  errors_.resize(errorCount); // discard errors caused by recovery
727  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
728  break;
729  }
730  errors_.resize(errorCount);
731  return false;
732 }
733 
734 bool Reader::addErrorAndRecover(const String& message, Token& token,
735  TokenType skipUntilToken) {
736  addError(message, token);
737  return recoverFromError(skipUntilToken);
738 }
739 
740 Value& Reader::currentValue() { return *(nodes_.top()); }
741 
742 Reader::Char Reader::getNextChar() {
743  if (current_ == end_)
744  return 0;
745  return *current_++;
746 }
747 
748 void Reader::getLocationLineAndColumn(Location location, int& line,
749  int& column) const {
750  Location current = begin_;
751  Location lastLineStart = current;
752  line = 0;
753  while (current < location && current != end_) {
754  Char c = *current++;
755  if (c == '\r') {
756  if (current != end_ && *current == '\n')
757  ++current;
758  lastLineStart = current;
759  ++line;
760  } else if (c == '\n') {
761  lastLineStart = current;
762  ++line;
763  }
764  }
765  // column & line start at 1
766  column = int(location - lastLineStart) + 1;
767  ++line;
768 }
769 
770 String Reader::getLocationLineAndColumn(Location location) const {
771  int line, column;
772  getLocationLineAndColumn(location, line, column);
773  char buffer[18 + 16 + 16 + 1];
774  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
775  return buffer;
776 }
777 
778 // Deprecated. Preserved for backward compatibility
779 String Reader::getFormatedErrorMessages() const {
780  return getFormattedErrorMessages();
781 }
782 
784  String formattedMessage;
785  for (const auto& error : errors_) {
786  formattedMessage +=
787  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
788  formattedMessage += " " + error.message_ + "\n";
789  if (error.extra_)
790  formattedMessage +=
791  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
792  }
793  return formattedMessage;
794 }
795 
796 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
797  std::vector<Reader::StructuredError> allErrors;
798  for (const auto& error : errors_) {
799  Reader::StructuredError structured;
800  structured.offset_start = error.token_.start_ - begin_;
801  structured.offset_limit = error.token_.end_ - begin_;
802  structured.message = error.message_;
803  allErrors.push_back(structured);
804  }
805  return allErrors;
806 }
807 
808 bool Reader::pushError(const Value& value, const String& message) {
809  ptrdiff_t const length = end_ - begin_;
810  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
811  return false;
812  Token token;
813  token.type_ = tokenError;
814  token.start_ = begin_ + value.getOffsetStart();
815  token.end_ = begin_ + value.getOffsetLimit();
816  ErrorInfo info;
817  info.token_ = token;
818  info.message_ = message;
819  info.extra_ = nullptr;
820  errors_.push_back(info);
821  return true;
822 }
823 
824 bool Reader::pushError(const Value& value, const String& message,
825  const Value& extra) {
826  ptrdiff_t const length = end_ - begin_;
827  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
828  extra.getOffsetLimit() > length)
829  return false;
830  Token token;
831  token.type_ = tokenError;
832  token.start_ = begin_ + value.getOffsetStart();
833  token.end_ = begin_ + value.getOffsetLimit();
834  ErrorInfo info;
835  info.token_ = token;
836  info.message_ = message;
837  info.extra_ = begin_ + extra.getOffsetStart();
838  errors_.push_back(info);
839  return true;
840 }
841 
842 bool Reader::good() const { return errors_.empty(); }
843 
844 // Originally copied from the Features class (now deprecated), used internally
845 // for features implementation.
846 class OurFeatures {
847 public:
848  static OurFeatures all();
849  bool allowComments_;
850  bool allowTrailingCommas_;
851  bool strictRoot_;
852  bool allowDroppedNullPlaceholders_;
853  bool allowNumericKeys_;
854  bool allowSingleQuotes_;
855  bool failIfExtra_;
856  bool rejectDupKeys_;
857  bool allowSpecialFloats_;
858  bool skipBom_;
859  size_t stackLimit_;
860 }; // OurFeatures
861 
862 OurFeatures OurFeatures::all() { return {}; }
863 
864 // Implementation of class Reader
865 // ////////////////////////////////
866 
867 // Originally copied from the Reader class (now deprecated), used internally
868 // for implementing JSON reading.
869 class OurReader {
870 public:
871  using Char = char;
872  using Location = const Char*;
873 
874  explicit OurReader(OurFeatures const& features);
875  bool parse(const char* beginDoc, const char* endDoc, Value& root,
876  bool collectComments = true);
877  String getFormattedErrorMessages() const;
878  std::vector<CharReader::StructuredError> getStructuredErrors() const;
879 
880 private:
881  OurReader(OurReader const&); // no impl
882  void operator=(OurReader const&); // no impl
883 
884  enum TokenType {
885  tokenEndOfStream = 0,
886  tokenObjectBegin,
887  tokenObjectEnd,
888  tokenArrayBegin,
889  tokenArrayEnd,
890  tokenString,
891  tokenNumber,
892  tokenTrue,
893  tokenFalse,
894  tokenNull,
895  tokenNaN,
896  tokenPosInf,
897  tokenNegInf,
898  tokenArraySeparator,
899  tokenMemberSeparator,
900  tokenComment,
901  tokenError
902  };
903 
904  class Token {
905  public:
906  TokenType type_;
907  Location start_;
908  Location end_;
909  };
910 
911  class ErrorInfo {
912  public:
913  Token token_;
914  String message_;
915  Location extra_;
916  };
917 
918  using Errors = std::deque<ErrorInfo>;
919 
920  bool readToken(Token& token);
921  bool readTokenSkippingComments(Token& token);
922  void skipSpaces();
923  void skipBom(bool skipBom);
924  bool match(const Char* pattern, int patternLength);
925  bool readComment();
926  bool readCStyleComment(bool* containsNewLineResult);
927  bool readCppStyleComment();
928  bool readString();
929  bool readStringSingleQuote();
930  bool readNumber(bool checkInf);
931  bool readValue();
932  bool readObject(Token& token);
933  bool readArray(Token& token);
934  bool decodeNumber(Token& token);
935  bool decodeNumber(Token& token, Value& decoded);
936  bool decodeString(Token& token);
937  bool decodeString(Token& token, String& decoded);
938  bool decodeDouble(Token& token);
939  bool decodeDouble(Token& token, Value& decoded);
940  bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
941  unsigned int& unicode);
942  bool decodeUnicodeEscapeSequence(Token& token, Location& current,
943  Location end, unsigned int& unicode);
944  bool addError(const String& message, Token& token, Location extra = nullptr);
945  bool recoverFromError(TokenType skipUntilToken);
946  bool addErrorAndRecover(const String& message, Token& token,
947  TokenType skipUntilToken);
948  void skipUntilSpace();
949  Value& currentValue();
950  Char getNextChar();
951  void getLocationLineAndColumn(Location location, int& line,
952  int& column) const;
953  String getLocationLineAndColumn(Location location) const;
954  void addComment(Location begin, Location end, CommentPlacement placement);
955 
956  static String normalizeEOL(Location begin, Location end);
957  static bool containsNewLine(Location begin, Location end);
958 
959  using Nodes = std::stack<Value*>;
960 
961  Nodes nodes_{};
962  Errors errors_{};
963  String document_{};
964  Location begin_ = nullptr;
965  Location end_ = nullptr;
966  Location current_ = nullptr;
967  Location lastValueEnd_ = nullptr;
968  Value* lastValue_ = nullptr;
969  bool lastValueHasAComment_ = false;
970  String commentsBefore_{};
971 
972  OurFeatures const features_;
973  bool collectComments_ = false;
974 }; // OurReader
975 
976 // complete copy of Read impl, for OurReader
977 
978 // Test-only instrumentation: total bytes examined by
979 // OurReader::containsNewLine, so unit tests can assert that comment handling
980 // stays linear in the input rather than quadratic in the comment count (see
981 // CharReaderTest/parseCommentsAfterValueScansLinearly). thread_local so it
982 // never races during concurrent parsing; the increment is negligible and only
983 // runs while parsing comments. Not part of the supported public API.
985  static thread_local size_t count = 0;
986  return count;
987 }
988 
989 bool OurReader::containsNewLine(OurReader::Location begin,
990  OurReader::Location end) {
991  newlineScanByteCountForTesting() += static_cast<size_t>(end - begin);
992  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
993 }
994 
995 OurReader::OurReader(OurFeatures const& features) : features_(features) {}
996 
997 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
998  bool collectComments) {
999  if (!features_.allowComments_) {
1000  collectComments = false;
1001  }
1002 
1003  begin_ = beginDoc;
1004  end_ = endDoc;
1005  collectComments_ = collectComments;
1006  current_ = begin_;
1007  lastValueEnd_ = nullptr;
1008  lastValue_ = nullptr;
1009  commentsBefore_.clear();
1010  errors_.clear();
1011  while (!nodes_.empty())
1012  nodes_.pop();
1013  nodes_.push(&root);
1014 
1015  // skip byte order mark if it exists at the beginning of the UTF-8 text.
1016  skipBom(features_.skipBom_);
1017  bool successful = readValue();
1018  nodes_.pop();
1019  Token token;
1020  readTokenSkippingComments(token);
1021  if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1022  addError("Extra non-whitespace after JSON value.", token);
1023  return false;
1024  }
1025  if (collectComments_ && !commentsBefore_.empty())
1026  root.setComment(commentsBefore_, commentAfter);
1027  if (features_.strictRoot_) {
1028  if (!root.isArray() && !root.isObject()) {
1029  // Set error location to start of doc, ideally should be first token found
1030  // in doc
1031  token.type_ = tokenError;
1032  token.start_ = beginDoc;
1033  token.end_ = endDoc;
1034  addError(
1035  "A valid JSON document must be either an array or an object value.",
1036  token);
1037  return false;
1038  }
1039  }
1040  return successful;
1041 }
1042 
1043 bool OurReader::readValue() {
1044  // To preserve the old behaviour we cast size_t to int.
1045  if (nodes_.size() > features_.stackLimit_)
1046  throwRuntimeError("Exceeded stackLimit in readValue().");
1047  Token token;
1048  readTokenSkippingComments(token);
1049  bool successful = true;
1050 
1051  if (collectComments_ && !commentsBefore_.empty()) {
1052  currentValue().setComment(commentsBefore_, commentBefore);
1053  commentsBefore_.clear();
1054  }
1055 
1056  switch (token.type_) {
1057  case tokenObjectBegin:
1058  successful = readObject(token);
1059  currentValue().setOffsetLimit(current_ - begin_);
1060  break;
1061  case tokenArrayBegin:
1062  successful = readArray(token);
1063  currentValue().setOffsetLimit(current_ - begin_);
1064  break;
1065  case tokenNumber:
1066  successful = decodeNumber(token);
1067  break;
1068  case tokenString:
1069  successful = decodeString(token);
1070  break;
1071  case tokenTrue: {
1072  Value v(true);
1073  currentValue().swapPayload(v);
1074  currentValue().setOffsetStart(token.start_ - begin_);
1075  currentValue().setOffsetLimit(token.end_ - begin_);
1076  } break;
1077  case tokenFalse: {
1078  Value v(false);
1079  currentValue().swapPayload(v);
1080  currentValue().setOffsetStart(token.start_ - begin_);
1081  currentValue().setOffsetLimit(token.end_ - begin_);
1082  } break;
1083  case tokenNull: {
1084  Value v;
1085  currentValue().swapPayload(v);
1086  currentValue().setOffsetStart(token.start_ - begin_);
1087  currentValue().setOffsetLimit(token.end_ - begin_);
1088  } break;
1089  case tokenNaN: {
1090  Value v(std::numeric_limits<double>::quiet_NaN());
1091  currentValue().swapPayload(v);
1092  currentValue().setOffsetStart(token.start_ - begin_);
1093  currentValue().setOffsetLimit(token.end_ - begin_);
1094  } break;
1095  case tokenPosInf: {
1096  Value v(std::numeric_limits<double>::infinity());
1097  currentValue().swapPayload(v);
1098  currentValue().setOffsetStart(token.start_ - begin_);
1099  currentValue().setOffsetLimit(token.end_ - begin_);
1100  } break;
1101  case tokenNegInf: {
1102  Value v(-std::numeric_limits<double>::infinity());
1103  currentValue().swapPayload(v);
1104  currentValue().setOffsetStart(token.start_ - begin_);
1105  currentValue().setOffsetLimit(token.end_ - begin_);
1106  } break;
1107  case tokenArraySeparator:
1108  case tokenObjectEnd:
1109  case tokenArrayEnd:
1110  if (features_.allowDroppedNullPlaceholders_) {
1111  // "Un-read" the current token and mark the current value as a null
1112  // token.
1113  current_--;
1114  Value v;
1115  currentValue().swapPayload(v);
1116  currentValue().setOffsetStart(current_ - begin_ - 1);
1117  currentValue().setOffsetLimit(current_ - begin_);
1118  break;
1119  } // else, fall through ...
1120  default:
1121  currentValue().setOffsetStart(token.start_ - begin_);
1122  currentValue().setOffsetLimit(token.end_ - begin_);
1123  return addError("Syntax error: value, object or array expected.", token);
1124  }
1125 
1126  if (collectComments_) {
1127  lastValueEnd_ = current_;
1128  lastValueHasAComment_ = false;
1129  lastValue_ = &currentValue();
1130  }
1131 
1132  return successful;
1133 }
1134 
1135 bool OurReader::readTokenSkippingComments(Token& token) {
1136  bool success = readToken(token);
1137  if (features_.allowComments_) {
1138  while (success && token.type_ == tokenComment) {
1139  success = readToken(token);
1140  }
1141  }
1142  return success;
1143 }
1144 
1145 bool OurReader::readToken(Token& token) {
1146  skipSpaces();
1147  token.start_ = current_;
1148  Char c = getNextChar();
1149  bool ok = true;
1150  switch (c) {
1151  case '{':
1152  token.type_ = tokenObjectBegin;
1153  break;
1154  case '}':
1155  token.type_ = tokenObjectEnd;
1156  break;
1157  case '[':
1158  token.type_ = tokenArrayBegin;
1159  break;
1160  case ']':
1161  token.type_ = tokenArrayEnd;
1162  break;
1163  case '"':
1164  token.type_ = tokenString;
1165  ok = readString();
1166  break;
1167  case '\'':
1168  if (features_.allowSingleQuotes_) {
1169  token.type_ = tokenString;
1170  ok = readStringSingleQuote();
1171  } else {
1172  // If we don't allow single quotes, this is a failure case.
1173  ok = false;
1174  }
1175  break;
1176  case '/':
1177  token.type_ = tokenComment;
1178  ok = readComment();
1179  break;
1180  case '0':
1181  case '1':
1182  case '2':
1183  case '3':
1184  case '4':
1185  case '5':
1186  case '6':
1187  case '7':
1188  case '8':
1189  case '9':
1190  token.type_ = tokenNumber;
1191  readNumber(false);
1192  break;
1193  case '-':
1194  if (readNumber(true)) {
1195  token.type_ = tokenNumber;
1196  } else {
1197  token.type_ = tokenNegInf;
1198  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1199  }
1200  break;
1201  case '+':
1202  if (readNumber(true)) {
1203  token.type_ = tokenNumber;
1204  } else {
1205  token.type_ = tokenPosInf;
1206  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1207  }
1208  break;
1209  case 't':
1210  token.type_ = tokenTrue;
1211  ok = match("rue", 3);
1212  break;
1213  case 'f':
1214  token.type_ = tokenFalse;
1215  ok = match("alse", 4);
1216  break;
1217  case 'n':
1218  token.type_ = tokenNull;
1219  ok = match("ull", 3);
1220  break;
1221  case 'N':
1222  if (features_.allowSpecialFloats_) {
1223  token.type_ = tokenNaN;
1224  ok = match("aN", 2);
1225  } else {
1226  ok = false;
1227  }
1228  break;
1229  case 'I':
1230  if (features_.allowSpecialFloats_) {
1231  token.type_ = tokenPosInf;
1232  ok = match("nfinity", 7);
1233  } else {
1234  ok = false;
1235  }
1236  break;
1237  case ',':
1238  token.type_ = tokenArraySeparator;
1239  break;
1240  case ':':
1241  token.type_ = tokenMemberSeparator;
1242  break;
1243  case 0:
1244  token.type_ = tokenEndOfStream;
1245  break;
1246  default:
1247  ok = false;
1248  break;
1249  }
1250  if (!ok)
1251  token.type_ = tokenError;
1252  token.end_ = current_;
1253  return ok;
1254 }
1255 
1256 void OurReader::skipSpaces() {
1257  while (current_ != end_) {
1258  Char c = *current_;
1259  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1260  ++current_;
1261  else
1262  break;
1263  }
1264 }
1265 
1266 void OurReader::skipBom(bool skipBom) {
1267  // The default behavior is to skip BOM.
1268  if (skipBom) {
1269  if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1270  begin_ += 3;
1271  current_ = begin_;
1272  }
1273  }
1274 }
1275 
1276 bool OurReader::match(const Char* pattern, int patternLength) {
1277  if (end_ - current_ < patternLength)
1278  return false;
1279  int index = patternLength;
1280  while (index--)
1281  if (current_[index] != pattern[index])
1282  return false;
1283  current_ += patternLength;
1284  return true;
1285 }
1286 
1287 bool OurReader::readComment() {
1288  const Location commentBegin = current_ - 1;
1289  const Char c = getNextChar();
1290  bool successful = false;
1291  bool cStyleWithEmbeddedNewline = false;
1292 
1293  const bool isCStyleComment = (c == '*');
1294  const bool isCppStyleComment = (c == '/');
1295  if (isCStyleComment) {
1296  successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1297  } else if (isCppStyleComment) {
1298  successful = readCppStyleComment();
1299  }
1300 
1301  if (!successful)
1302  return false;
1303 
1304  if (collectComments_) {
1305  CommentPlacement placement = commentBefore;
1306 
1307  if (!lastValueHasAComment_) {
1308  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1309  if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1310  placement = commentAfterOnSameLine;
1311  }
1312  }
1313  // The gap between the last value and this comment only grows as more
1314  // comments are consumed, so a later comment can never be on the same
1315  // line as that value. Mark it handled to avoid re-scanning the same
1316  // growing prefix for every following comment (quadratic behavior).
1317  lastValueHasAComment_ = true;
1318  }
1319 
1320  addComment(commentBegin, current_, placement);
1321  }
1322  return true;
1323 }
1324 
1325 String OurReader::normalizeEOL(OurReader::Location begin,
1326  OurReader::Location end) {
1327  String normalized;
1328  normalized.reserve(static_cast<size_t>(end - begin));
1329  OurReader::Location current = begin;
1330  while (current != end) {
1331  char c = *current++;
1332  if (c == '\r') {
1333  if (current != end && *current == '\n')
1334  // convert dos EOL
1335  ++current;
1336  // convert Mac EOL
1337  normalized += '\n';
1338  } else {
1339  normalized += c;
1340  }
1341  }
1342  return normalized;
1343 }
1344 
1345 void OurReader::addComment(Location begin, Location end,
1346  CommentPlacement placement) {
1347  assert(collectComments_);
1348  const String& normalized = normalizeEOL(begin, end);
1349  if (placement == commentAfterOnSameLine) {
1350  assert(lastValue_ != nullptr);
1351  lastValue_->setComment(normalized, placement);
1352  } else {
1353  commentsBefore_ += normalized;
1354  }
1355 }
1356 
1357 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1358  *containsNewLineResult = false;
1359 
1360  while ((current_ + 1) < end_) {
1361  Char c = getNextChar();
1362  if (c == '*' && *current_ == '/')
1363  break;
1364  if (c == '\n')
1365  *containsNewLineResult = true;
1366  }
1367 
1368  return getNextChar() == '/';
1369 }
1370 
1371 bool OurReader::readCppStyleComment() {
1372  while (current_ != end_) {
1373  Char c = getNextChar();
1374  if (c == '\n')
1375  break;
1376  if (c == '\r') {
1377  // Consume DOS EOL. It will be normalized in addComment.
1378  if (current_ != end_ && *current_ == '\n')
1379  getNextChar();
1380  // Break on Moc OS 9 EOL.
1381  break;
1382  }
1383  }
1384  return true;
1385 }
1386 
1387 bool OurReader::readNumber(bool checkInf) {
1388  Location p = current_;
1389  if (checkInf && p != end_ && *p == 'I') {
1390  current_ = ++p;
1391  return false;
1392  }
1393  char c = '0'; // stopgap for already consumed character
1394  // integral part
1395  while (c >= '0' && c <= '9')
1396  c = (current_ = p) < end_ ? *p++ : '\0';
1397  // fractional part
1398  if (c == '.') {
1399  c = (current_ = p) < end_ ? *p++ : '\0';
1400  while (c >= '0' && c <= '9')
1401  c = (current_ = p) < end_ ? *p++ : '\0';
1402  }
1403  // exponential part
1404  if (c == 'e' || c == 'E') {
1405  c = (current_ = p) < end_ ? *p++ : '\0';
1406  if (c == '+' || c == '-')
1407  c = (current_ = p) < end_ ? *p++ : '\0';
1408  while (c >= '0' && c <= '9')
1409  c = (current_ = p) < end_ ? *p++ : '\0';
1410  }
1411  return true;
1412 }
1413 bool OurReader::readString() {
1414  Char c = 0;
1415  while (current_ != end_) {
1416  c = getNextChar();
1417  if (c == '\\')
1418  getNextChar();
1419  else if (c == '"')
1420  break;
1421  }
1422  return c == '"';
1423 }
1424 
1425 bool OurReader::readStringSingleQuote() {
1426  Char c = 0;
1427  while (current_ != end_) {
1428  c = getNextChar();
1429  if (c == '\\')
1430  getNextChar();
1431  else if (c == '\'')
1432  break;
1433  }
1434  return c == '\'';
1435 }
1436 
1437 bool OurReader::readObject(Token& token) {
1438  Token tokenName;
1439  String name;
1440  Value init(objectValue);
1441  currentValue().swapPayload(init);
1442  currentValue().setOffsetStart(token.start_ - begin_);
1443  while (readTokenSkippingComments(tokenName)) {
1444  if (tokenName.type_ == tokenObjectEnd &&
1445  (name.empty() ||
1446  features_.allowTrailingCommas_)) // empty object or trailing comma
1447  return true;
1448  name.clear();
1449  if (tokenName.type_ == tokenString) {
1450  if (!decodeString(tokenName, name))
1451  return recoverFromError(tokenObjectEnd);
1452  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1453  Value numberName;
1454  if (!decodeNumber(tokenName, numberName))
1455  return recoverFromError(tokenObjectEnd);
1456  name = numberName.asString();
1457  } else {
1458  break;
1459  }
1460  if (name.length() >= (1U << 30))
1461  throwRuntimeError("keylength >= 2^30");
1462  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1463  String msg = "Duplicate key: '" + name + "'";
1464  return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1465  }
1466 
1467  Token colon;
1468  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1469  return addErrorAndRecover("Missing ':' after object member name", colon,
1470  tokenObjectEnd);
1471  }
1472  Value& value = currentValue()[name];
1473  nodes_.push(&value);
1474  bool ok = readValue();
1475  nodes_.pop();
1476  if (!ok) // error already set
1477  return recoverFromError(tokenObjectEnd);
1478 
1479  Token comma;
1480  if (!readTokenSkippingComments(comma) ||
1481  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
1482  return addErrorAndRecover("Missing ',' or '}' in object declaration",
1483  comma, tokenObjectEnd);
1484  }
1485  if (comma.type_ == tokenObjectEnd)
1486  return true;
1487  }
1488  return addErrorAndRecover("Missing '}' or object member name", tokenName,
1489  tokenObjectEnd);
1490 }
1491 
1492 bool OurReader::readArray(Token& token) {
1493  Value init(arrayValue);
1494  currentValue().swapPayload(init);
1495  currentValue().setOffsetStart(token.start_ - begin_);
1496  int index = 0;
1497  for (;;) {
1498  skipSpaces();
1499  if (current_ != end_ && *current_ == ']' &&
1500  (index == 0 ||
1501  (features_.allowTrailingCommas_ &&
1502  !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1503  // comma
1504  {
1505  Token endArray;
1506  readToken(endArray);
1507  return true;
1508  }
1509  Value& value = currentValue()[index++];
1510  nodes_.push(&value);
1511  bool ok = readValue();
1512  nodes_.pop();
1513  if (!ok) // error already set
1514  return recoverFromError(tokenArrayEnd);
1515 
1516  Token currentToken;
1517  // Accept Comment after last item in the array.
1518  ok = readTokenSkippingComments(currentToken);
1519  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1520  currentToken.type_ != tokenArrayEnd);
1521  if (!ok || badTokenType) {
1522  return addErrorAndRecover("Missing ',' or ']' in array declaration",
1523  currentToken, tokenArrayEnd);
1524  }
1525  if (currentToken.type_ == tokenArrayEnd)
1526  break;
1527  }
1528  return true;
1529 }
1530 
1531 bool OurReader::decodeNumber(Token& token) {
1532  Value decoded;
1533  if (!decodeNumber(token, decoded))
1534  return false;
1535  currentValue().swapPayload(decoded);
1536  currentValue().setOffsetStart(token.start_ - begin_);
1537  currentValue().setOffsetLimit(token.end_ - begin_);
1538  return true;
1539 }
1540 
1541 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1542  // Attempts to parse the number as an integer. If the number is
1543  // larger than the maximum supported value of an integer then
1544  // we decode the number as a double.
1545  Location current = token.start_;
1546  const bool isNegative = *current == '-';
1547  if (isNegative) {
1548  ++current;
1549  }
1550 
1551  // We assume we can represent the largest and smallest integer types as
1552  // unsigned integers with separate sign. This is only true if they can fit
1553  // into an unsigned integer.
1554  static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
1555  "Int must be smaller than UInt");
1556 
1557  // We need to convert minLargestInt into a positive number. The easiest way
1558  // to do this conversion is to assume our "threshold" value of minLargestInt
1559  // divided by 10 can fit in maxLargestInt when absolute valued. This should
1560  // be a safe assumption.
1561  static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1562  "The absolute value of minLargestInt must be greater than or "
1563  "equal to maxLargestInt");
1564  static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1565  "The absolute value of minLargestInt must be only 1 magnitude "
1566  "larger than maxLargest Int");
1567 
1568  static constexpr Value::LargestUInt positive_threshold =
1569  Value::maxLargestUInt / 10;
1570  static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1571 
1572  // For the negative values, we have to be more careful. Since typically
1573  // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1574  // then take the inverse. This assumes that minLargestInt is only a single
1575  // power of 10 different in magnitude, which we check above. For the last
1576  // digit, we take the modulus before negating for the same reason.
1577  static constexpr auto negative_threshold =
1578  Value::LargestUInt(-(Value::minLargestInt / 10));
1579  static constexpr auto negative_last_digit =
1580  Value::UInt(-(Value::minLargestInt % 10));
1581 
1582  const Value::LargestUInt threshold =
1583  isNegative ? negative_threshold : positive_threshold;
1584  const Value::UInt max_last_digit =
1585  isNegative ? negative_last_digit : positive_last_digit;
1586 
1587  Value::LargestUInt value = 0;
1588  while (current < token.end_) {
1589  Char c = *current++;
1590  if (c < '0' || c > '9')
1591  return decodeDouble(token, decoded);
1592 
1593  const auto digit(static_cast<Value::UInt>(c - '0'));
1594  if (value >= threshold) {
1595  // We've hit or exceeded the max value divided by 10 (rounded down). If
1596  // a) we've only just touched the limit, meaning value == threshold,
1597  // b) this is the last digit, or
1598  // c) it's small enough to fit in that rounding delta, we're okay.
1599  // Otherwise treat this number as a double to avoid overflow.
1600  if (value > threshold || current != token.end_ ||
1601  digit > max_last_digit) {
1602  return decodeDouble(token, decoded);
1603  }
1604  }
1605  value = value * 10 + digit;
1606  }
1607 
1608  if (isNegative) {
1609  // We use the same magnitude assumption here, just in case.
1610  const auto last_digit = static_cast<Value::UInt>(value % 10);
1611  decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1612  } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1613  decoded = Value::LargestInt(value);
1614  } else {
1615  decoded = value;
1616  }
1617 
1618  return true;
1619 }
1620 
1621 bool OurReader::decodeDouble(Token& token) {
1622  Value decoded;
1623  if (!decodeDouble(token, decoded))
1624  return false;
1625  currentValue().swapPayload(decoded);
1626  currentValue().setOffsetStart(token.start_ - begin_);
1627  currentValue().setOffsetLimit(token.end_ - begin_);
1628  return true;
1629 }
1630 
1631 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1632  double value = 0;
1633  IStringStream is(String(token.start_, token.end_));
1634  is.imbue(std::locale::classic());
1635  if (!(is >> value)) {
1636  if (value == std::numeric_limits<double>::max())
1637  value = std::numeric_limits<double>::infinity();
1638  else if (value == std::numeric_limits<double>::lowest())
1639  value = -std::numeric_limits<double>::infinity();
1640  else if (!std::isinf(value))
1641  return addError(
1642  "'" + String(token.start_, token.end_) + "' is not a number.", token);
1643  }
1644  decoded = value;
1645  return true;
1646 }
1647 
1648 bool OurReader::decodeString(Token& token) {
1649  String decoded_string;
1650  if (!decodeString(token, decoded_string))
1651  return false;
1652  Value decoded(decoded_string);
1653  currentValue().swapPayload(decoded);
1654  currentValue().setOffsetStart(token.start_ - begin_);
1655  currentValue().setOffsetLimit(token.end_ - begin_);
1656  return true;
1657 }
1658 
1659 bool OurReader::decodeString(Token& token, String& decoded) {
1660  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1661  Location current = token.start_ + 1; // skip '"'
1662  Location end = token.end_ - 1; // do not include '"'
1663  while (current != end) {
1664  Char c = *current++;
1665  if (c == '"')
1666  break;
1667  if (c == '\\') {
1668  if (current == end)
1669  return addError("Empty escape sequence in string", token, current);
1670  Char escape = *current++;
1671  switch (escape) {
1672  case '"':
1673  decoded += '"';
1674  break;
1675  case '/':
1676  decoded += '/';
1677  break;
1678  case '\\':
1679  decoded += '\\';
1680  break;
1681  case 'b':
1682  decoded += '\b';
1683  break;
1684  case 'f':
1685  decoded += '\f';
1686  break;
1687  case 'n':
1688  decoded += '\n';
1689  break;
1690  case 'r':
1691  decoded += '\r';
1692  break;
1693  case 't':
1694  decoded += '\t';
1695  break;
1696  case 'u': {
1697  unsigned int unicode;
1698  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1699  return false;
1700  decoded += codePointToUTF8(unicode);
1701  } break;
1702  default:
1703  return addError("Bad escape sequence in string", token, current);
1704  }
1705  } else {
1706  if (static_cast<unsigned char>(c) < 0x20)
1707  return addError("Control character in string", token, current - 1);
1708  decoded += c;
1709  }
1710  }
1711  return true;
1712 }
1713 
1714 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1715  Location end, unsigned int& unicode) {
1716 
1717  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1718  return false;
1719  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1720  // surrogate pairs
1721  if (end - current < 6)
1722  return addError(
1723  "additional six characters expected to parse unicode surrogate pair.",
1724  token, current);
1725  if (*(current++) == '\\' && *(current++) == 'u') {
1726  unsigned int surrogatePair;
1727  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1728  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1729  } else
1730  return false;
1731  } else
1732  return addError("expecting another \\u token to begin the second half of "
1733  "a unicode surrogate pair",
1734  token, current);
1735  }
1736  return true;
1737 }
1738 
1739 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1740  Location end,
1741  unsigned int& ret_unicode) {
1742  if (end - current < 4)
1743  return addError(
1744  "Bad unicode escape sequence in string: four digits expected.", token,
1745  current);
1746  int unicode = 0;
1747  for (int index = 0; index < 4; ++index) {
1748  Char c = *current++;
1749  unicode *= 16;
1750  if (c >= '0' && c <= '9')
1751  unicode += c - '0';
1752  else if (c >= 'a' && c <= 'f')
1753  unicode += c - 'a' + 10;
1754  else if (c >= 'A' && c <= 'F')
1755  unicode += c - 'A' + 10;
1756  else
1757  return addError(
1758  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1759  token, current);
1760  }
1761  ret_unicode = static_cast<unsigned int>(unicode);
1762  return true;
1763 }
1764 
1765 bool OurReader::addError(const String& message, Token& token, Location extra) {
1766  ErrorInfo info;
1767  info.token_ = token;
1768  info.message_ = message;
1769  info.extra_ = extra;
1770  errors_.push_back(info);
1771  return false;
1772 }
1773 
1774 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1775  size_t errorCount = errors_.size();
1776  Token skip;
1777  for (;;) {
1778  if (!readToken(skip))
1779  errors_.resize(errorCount); // discard errors caused by recovery
1780  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1781  break;
1782  }
1783  errors_.resize(errorCount);
1784  return false;
1785 }
1786 
1787 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1788  TokenType skipUntilToken) {
1789  addError(message, token);
1790  return recoverFromError(skipUntilToken);
1791 }
1792 
1793 Value& OurReader::currentValue() { return *(nodes_.top()); }
1794 
1795 OurReader::Char OurReader::getNextChar() {
1796  if (current_ == end_)
1797  return 0;
1798  return *current_++;
1799 }
1800 
1801 void OurReader::getLocationLineAndColumn(Location location, int& line,
1802  int& column) const {
1803  Location current = begin_;
1804  Location lastLineStart = current;
1805  line = 0;
1806  while (current < location && current != end_) {
1807  Char c = *current++;
1808  if (c == '\r') {
1809  if (current != end_ && *current == '\n')
1810  ++current;
1811  lastLineStart = current;
1812  ++line;
1813  } else if (c == '\n') {
1814  lastLineStart = current;
1815  ++line;
1816  }
1817  }
1818  // column & line start at 1
1819  column = int(location - lastLineStart) + 1;
1820  ++line;
1821 }
1822 
1823 String OurReader::getLocationLineAndColumn(Location location) const {
1824  int line, column;
1825  getLocationLineAndColumn(location, line, column);
1826  char buffer[18 + 16 + 16 + 1];
1827  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1828  return buffer;
1829 }
1830 
1831 String OurReader::getFormattedErrorMessages() const {
1832  String formattedMessage;
1833  for (const auto& error : errors_) {
1834  formattedMessage +=
1835  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1836  formattedMessage += " " + error.message_ + "\n";
1837  if (error.extra_)
1838  formattedMessage +=
1839  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1840  }
1841  return formattedMessage;
1842 }
1843 
1844 std::vector<CharReader::StructuredError>
1845 OurReader::getStructuredErrors() const {
1846  std::vector<CharReader::StructuredError> allErrors;
1847  for (const auto& error : errors_) {
1848  CharReader::StructuredError structured;
1849  structured.offset_start = error.token_.start_ - begin_;
1850  structured.offset_limit = error.token_.end_ - begin_;
1851  structured.message = error.message_;
1852  allErrors.push_back(structured);
1853  }
1854  return allErrors;
1855 }
1856 
1857 class OurCharReader : public CharReader {
1858 
1859 public:
1860  OurCharReader(bool collectComments, OurFeatures const& features)
1861  : CharReader(
1862  std::unique_ptr<OurImpl>(new OurImpl(collectComments, features))) {}
1863 
1864 protected:
1865  class OurImpl : public Impl {
1866  public:
1867  OurImpl(bool collectComments, OurFeatures const& features)
1868  : collectComments_(collectComments), reader_(features) {}
1869 
1870  bool parse(char const* beginDoc, char const* endDoc, Value* root,
1871  String* errs) override {
1872  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1873  if (errs) {
1874  *errs = reader_.getFormattedErrorMessages();
1875  }
1876  return ok;
1877  }
1878 
1879  std::vector<CharReader::StructuredError>
1880  getStructuredErrors() const override {
1881  return reader_.getStructuredErrors();
1882  }
1883 
1884  private:
1885  bool const collectComments_;
1886  OurReader reader_;
1887  };
1888 };
1889 
1890 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1891 CharReaderBuilder::~CharReaderBuilder() = default;
1892 CharReader* CharReaderBuilder::newCharReader() const {
1893  bool collectComments = settings_["collectComments"].asBool();
1894  OurFeatures features = OurFeatures::all();
1895  features.allowComments_ = settings_["allowComments"].asBool();
1896  features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1897  features.strictRoot_ = settings_["strictRoot"].asBool();
1898  features.allowDroppedNullPlaceholders_ =
1899  settings_["allowDroppedNullPlaceholders"].asBool();
1900  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1901  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1902 
1903  // Stack limit is always a size_t, so we get this as an unsigned int
1904  // regardless of it we have 64-bit integer support enabled.
1905  features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1906  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1907  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1908  features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1909  features.skipBom_ = settings_["skipBom"].asBool();
1910  return new OurCharReader(collectComments, features);
1911 }
1912 
1913 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1914  static const auto& valid_keys = *new std::set<String>{
1915  "collectComments",
1916  "allowComments",
1917  "allowTrailingCommas",
1918  "strictRoot",
1919  "allowDroppedNullPlaceholders",
1920  "allowNumericKeys",
1921  "allowSingleQuotes",
1922  "stackLimit",
1923  "failIfExtra",
1924  "rejectDupKeys",
1925  "allowSpecialFloats",
1926  "skipBom",
1927  };
1928  for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1929  auto key = si.name();
1930  if (valid_keys.count(key))
1931  continue;
1932  if (invalid)
1933  (*invalid)[key] = *si;
1934  else
1935  return false;
1936  }
1937  return invalid ? invalid->empty() : true;
1938 }
1939 
1940 Value& CharReaderBuilder::operator[](const String& key) {
1941  return settings_[key];
1942 }
1943 // static
1944 void CharReaderBuilder::strictMode(Json::Value* settings) {
1946  (*settings)["allowComments"] = false;
1947  (*settings)["allowTrailingCommas"] = false;
1948  (*settings)["strictRoot"] = true;
1949  (*settings)["allowDroppedNullPlaceholders"] = false;
1950  (*settings)["allowNumericKeys"] = false;
1951  (*settings)["allowSingleQuotes"] = false;
1952  (*settings)["stackLimit"] = 256;
1953  (*settings)["failIfExtra"] = true;
1954  (*settings)["rejectDupKeys"] = true;
1955  (*settings)["allowSpecialFloats"] = false;
1956  (*settings)["skipBom"] = true;
1958 }
1959 // static
1960 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1962  (*settings)["collectComments"] = true;
1963  (*settings)["allowComments"] = true;
1964  (*settings)["allowTrailingCommas"] = true;
1965  (*settings)["strictRoot"] = false;
1966  (*settings)["allowDroppedNullPlaceholders"] = false;
1967  (*settings)["allowNumericKeys"] = false;
1968  (*settings)["allowSingleQuotes"] = false;
1969  (*settings)["stackLimit"] = 256;
1970  (*settings)["failIfExtra"] = false;
1971  (*settings)["rejectDupKeys"] = false;
1972  (*settings)["allowSpecialFloats"] = false;
1973  (*settings)["skipBom"] = true;
1975 }
1976 // static
1977 void CharReaderBuilder::ecma404Mode(Json::Value* settings) {
1979  (*settings)["allowComments"] = false;
1980  (*settings)["allowTrailingCommas"] = false;
1981  (*settings)["strictRoot"] = false;
1982  (*settings)["allowDroppedNullPlaceholders"] = false;
1983  (*settings)["allowNumericKeys"] = false;
1984  (*settings)["allowSingleQuotes"] = false;
1985  (*settings)["stackLimit"] = 256;
1986  (*settings)["failIfExtra"] = true;
1987  (*settings)["rejectDupKeys"] = false;
1988  (*settings)["allowSpecialFloats"] = false;
1989  (*settings)["skipBom"] = false;
1991 }
1992 
1993 std::vector<CharReader::StructuredError>
1994 CharReader::getStructuredErrors() const {
1995  return _impl->getStructuredErrors();
1996 }
1997 
1998 bool CharReader::parse(char const* beginDoc, char const* endDoc, Value* root,
1999  String* errs) {
2000  return _impl->parse(beginDoc, endDoc, root, errs);
2001 }
2002 
2004 // global functions
2005 
2006 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
2007  String* errs) {
2008  OStringStream ssin;
2009  ssin << sin.rdbuf();
2010  String doc = std::move(ssin).str();
2011  char const* begin = doc.data();
2012  char const* end = begin + doc.size();
2013  // Note that we do not actually need a null-terminator.
2014  CharReaderPtr const reader(fact.newCharReader());
2015  return reader->parse(begin, end, root, errs);
2016 }
2017 
2020  String errs;
2021  bool ok = parseFromStream(b, sin, &root, &errs);
2022  if (!ok) {
2023  throwRuntimeError(errs);
2024  }
2025  return sin;
2026 }
2027 
2028 } // namespace Json
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:39
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:238
#define JSON_USE_EXCEPTION
Definition: config.h:20
#define JSON_API
If defined, indicates that the source file is amalgamated to prevent private header inclusion...
Definition: config.h:50
array value (ordered list)
Definition: value.h:128
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition: config.h:138
Json::LargestUInt LargestUInt
Definition: value.h:222
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... */.
Definition: value.h:668
object value (collection of name/value pairs).
Definition: value.h:129
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
Definition: json_value.cpp:501
STL namespace.
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:248
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
Definition: json_value.cpp:932
char Char
Definition: reader.h:39
std::basic_string< char, std::char_traits< char >, Allocator< char >> String
Definition: config.h:135
Features()
Initialize the configuration like JsonConfig::allFeatures;.
An error tagged with where in the JSON text it was encountered.
Definition: reader.h:47
std::unique_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:50
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
Int64 LargestInt
Definition: config.h:124
bool isObject() const
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition: value.h:243
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
size_t & newlineScanByteCountForTesting()
bool allowComments_
true if comments are allowed. Default: true.
Definition: json_features.h:45
CommentPlacement
Definition: value.h:132
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition: config.h:141
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: json_features.h:55
static size_t const stackLimit_g
Definition: json_reader.cpp:45
bool good() const
Return whether there are any errors.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Definition: json_reader.cpp:82
JSON (JavaScript Object Notation).
Definition: allocator.h:16
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: json_features.h:52
#define jsoncpp_snprintf
Definition: config.h:63
bool pushError(const Value &value, const String &message)
Add a semantic error message.
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition: value.h:241
Interface for reading JSON from a char array.
Definition: reader.h:248
Json::LargestInt LargestInt
Definition: value.h:221
unsigned int UInt
Definition: config.h:110
Represents a JSON value.
Definition: value.h:207
void setOffsetStart(ptrdiff_t start)
ptrdiff_t getOffsetStart() const
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:57
a comment on the line after a value (only make sense for
Definition: value.h:135
std::istream IStream
Definition: config.h:142
ptrdiff_t getOffsetLimit() const
#define JSONCPP_DEPRECATED_STACK_LIMIT
Definition: json_reader.cpp:42
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
void setOffsetLimit(ptrdiff_t limit)
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:59
bool strictRoot_
true if root must be either an array or an object value.
Definition: json_features.h:49
bool isArray() const
UInt64 LargestUInt
Definition: config.h:125
Build a CharReader implementation.
Definition: reader.h:317
Configuration passed to reader and writer.
Definition: json_features.h:22
a comment placed on the line before a value
Definition: value.h:133
Reader()
Constructs a Reader allowing all features for parsing.
Definition: json_reader.cpp:78
a comment just after a value on the same line
Definition: value.h:134
const Char * Location
Definition: reader.h:40
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().