/home/arjun/llvm-project/llvm/lib/Support/YAMLParser.cpp
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | //===- YAMLParser.cpp - Simple YAML parser --------------------------------===// | 
| 2 |  | // | 
| 3 |  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
| 4 |  | // See https://llvm.org/LICENSE.txt for license information. | 
| 5 |  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
| 6 |  | // | 
| 7 |  | //===----------------------------------------------------------------------===// | 
| 8 |  | // | 
| 9 |  | //  This file implements a YAML parser. | 
| 10 |  | // | 
| 11 |  | //===----------------------------------------------------------------------===// | 
| 12 |  |  | 
| 13 |  | #include "llvm/Support/YAMLParser.h" | 
| 14 |  | #include "llvm/ADT/AllocatorList.h" | 
| 15 |  | #include "llvm/ADT/ArrayRef.h" | 
| 16 |  | #include "llvm/ADT/None.h" | 
| 17 |  | #include "llvm/ADT/STLExtras.h" | 
| 18 |  | #include "llvm/ADT/SmallString.h" | 
| 19 |  | #include "llvm/ADT/SmallVector.h" | 
| 20 |  | #include "llvm/ADT/StringExtras.h" | 
| 21 |  | #include "llvm/ADT/StringRef.h" | 
| 22 |  | #include "llvm/ADT/Twine.h" | 
| 23 |  | #include "llvm/Support/Compiler.h" | 
| 24 |  | #include "llvm/Support/ErrorHandling.h" | 
| 25 |  | #include "llvm/Support/MemoryBuffer.h" | 
| 26 |  | #include "llvm/Support/SMLoc.h" | 
| 27 |  | #include "llvm/Support/SourceMgr.h" | 
| 28 |  | #include "llvm/Support/Unicode.h" | 
| 29 |  | #include "llvm/Support/raw_ostream.h" | 
| 30 |  | #include <algorithm> | 
| 31 |  | #include <cassert> | 
| 32 |  | #include <cstddef> | 
| 33 |  | #include <cstdint> | 
| 34 |  | #include <map> | 
| 35 |  | #include <memory> | 
| 36 |  | #include <string> | 
| 37 |  | #include <system_error> | 
| 38 |  | #include <utility> | 
| 39 |  |  | 
| 40 |  | using namespace llvm; | 
| 41 |  | using namespace yaml; | 
| 42 |  |  | 
| 43 |  | enum UnicodeEncodingForm { | 
| 44 |  |   UEF_UTF32_LE, ///< UTF-32 Little Endian | 
| 45 |  |   UEF_UTF32_BE, ///< UTF-32 Big Endian | 
| 46 |  |   UEF_UTF16_LE, ///< UTF-16 Little Endian | 
| 47 |  |   UEF_UTF16_BE, ///< UTF-16 Big Endian | 
| 48 |  |   UEF_UTF8,     ///< UTF-8 or ascii. | 
| 49 |  |   UEF_Unknown   ///< Not a valid Unicode encoding. | 
| 50 |  | }; | 
| 51 |  |  | 
| 52 |  | /// EncodingInfo - Holds the encoding type and length of the byte order mark if | 
| 53 |  | ///                it exists. Length is in {0, 2, 3, 4}. | 
| 54 |  | using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>; | 
| 55 |  |  | 
| 56 |  | /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode | 
| 57 |  | ///                      encoding form of \a Input. | 
| 58 |  | /// | 
| 59 |  | /// @param Input A string of length 0 or more. | 
| 60 |  | /// @returns An EncodingInfo indicating the Unicode encoding form of the input | 
| 61 |  | ///          and how long the byte order mark is if one exists. | 
| 62 | 0 | static EncodingInfo getUnicodeEncoding(StringRef Input) { | 
| 63 | 0 |   if (Input.empty()) | 
| 64 | 0 |     return std::make_pair(UEF_Unknown, 0); | 
| 65 | 0 |  | 
| 66 | 0 |   switch (uint8_t(Input[0])) { | 
| 67 | 0 |   case 0x00: | 
| 68 | 0 |     if (Input.size() >= 4) { | 
| 69 | 0 |       if (  Input[1] == 0 | 
| 70 | 0 |          && uint8_t(Input[2]) == 0xFE | 
| 71 | 0 |          && uint8_t(Input[3]) == 0xFF) | 
| 72 | 0 |         return std::make_pair(UEF_UTF32_BE, 4); | 
| 73 | 0 |       if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) | 
| 74 | 0 |         return std::make_pair(UEF_UTF32_BE, 0); | 
| 75 | 0 |     } | 
| 76 | 0 |  | 
| 77 | 0 |     if (Input.size() >= 2 && Input[1] != 0) | 
| 78 | 0 |       return std::make_pair(UEF_UTF16_BE, 0); | 
| 79 | 0 |     return std::make_pair(UEF_Unknown, 0); | 
| 80 | 0 |   case 0xFF: | 
| 81 | 0 |     if (  Input.size() >= 4 | 
| 82 | 0 |        && uint8_t(Input[1]) == 0xFE | 
| 83 | 0 |        && Input[2] == 0 | 
| 84 | 0 |        && Input[3] == 0) | 
| 85 | 0 |       return std::make_pair(UEF_UTF32_LE, 4); | 
| 86 | 0 |  | 
| 87 | 0 |     if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) | 
| 88 | 0 |       return std::make_pair(UEF_UTF16_LE, 2); | 
| 89 | 0 |     return std::make_pair(UEF_Unknown, 0); | 
| 90 | 0 |   case 0xFE: | 
| 91 | 0 |     if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) | 
| 92 | 0 |       return std::make_pair(UEF_UTF16_BE, 2); | 
| 93 | 0 |     return std::make_pair(UEF_Unknown, 0); | 
| 94 | 0 |   case 0xEF: | 
| 95 | 0 |     if (  Input.size() >= 3 | 
| 96 | 0 |        && uint8_t(Input[1]) == 0xBB | 
| 97 | 0 |        && uint8_t(Input[2]) == 0xBF) | 
| 98 | 0 |       return std::make_pair(UEF_UTF8, 3); | 
| 99 | 0 |     return std::make_pair(UEF_Unknown, 0); | 
| 100 | 0 |   } | 
| 101 | 0 |  | 
| 102 | 0 |   // It could still be utf-32 or utf-16. | 
| 103 | 0 |   if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) | 
| 104 | 0 |     return std::make_pair(UEF_UTF32_LE, 0); | 
| 105 | 0 |  | 
| 106 | 0 |   if (Input.size() >= 2 && Input[1] == 0) | 
| 107 | 0 |     return std::make_pair(UEF_UTF16_LE, 0); | 
| 108 | 0 |  | 
| 109 | 0 |   return std::make_pair(UEF_UTF8, 0); | 
| 110 | 0 | } | 
| 111 |  |  | 
| 112 |  | /// Pin the vtables to this file. | 
| 113 | 0 | void Node::anchor() {} | 
| 114 | 0 | void NullNode::anchor() {} | 
| 115 | 0 | void ScalarNode::anchor() {} | 
| 116 | 0 | void BlockScalarNode::anchor() {} | 
| 117 | 0 | void KeyValueNode::anchor() {} | 
| 118 | 0 | void MappingNode::anchor() {} | 
| 119 | 0 | void SequenceNode::anchor() {} | 
| 120 | 0 | void AliasNode::anchor() {} | 
| 121 |  |  | 
| 122 |  | namespace llvm { | 
| 123 |  | namespace yaml { | 
| 124 |  |  | 
| 125 |  | /// Token - A single YAML token. | 
| 126 |  | struct Token { | 
| 127 |  |   enum TokenKind { | 
| 128 |  |     TK_Error, // Uninitialized token. | 
| 129 |  |     TK_StreamStart, | 
| 130 |  |     TK_StreamEnd, | 
| 131 |  |     TK_VersionDirective, | 
| 132 |  |     TK_TagDirective, | 
| 133 |  |     TK_DocumentStart, | 
| 134 |  |     TK_DocumentEnd, | 
| 135 |  |     TK_BlockEntry, | 
| 136 |  |     TK_BlockEnd, | 
| 137 |  |     TK_BlockSequenceStart, | 
| 138 |  |     TK_BlockMappingStart, | 
| 139 |  |     TK_FlowEntry, | 
| 140 |  |     TK_FlowSequenceStart, | 
| 141 |  |     TK_FlowSequenceEnd, | 
| 142 |  |     TK_FlowMappingStart, | 
| 143 |  |     TK_FlowMappingEnd, | 
| 144 |  |     TK_Key, | 
| 145 |  |     TK_Value, | 
| 146 |  |     TK_Scalar, | 
| 147 |  |     TK_BlockScalar, | 
| 148 |  |     TK_Alias, | 
| 149 |  |     TK_Anchor, | 
| 150 |  |     TK_Tag | 
| 151 |  |   } Kind = TK_Error; | 
| 152 |  |  | 
| 153 |  |   /// A string of length 0 or more whose begin() points to the logical location | 
| 154 |  |   /// of the token in the input. | 
| 155 |  |   StringRef Range; | 
| 156 |  |  | 
| 157 |  |   /// The value of a block scalar node. | 
| 158 |  |   std::string Value; | 
| 159 |  |  | 
| 160 | 0 |   Token() = default; | 
| 161 |  | }; | 
| 162 |  |  | 
| 163 |  | } // end namespace yaml | 
| 164 |  | } // end namespace llvm | 
| 165 |  |  | 
| 166 |  | using TokenQueueT = BumpPtrList<Token>; | 
| 167 |  |  | 
| 168 |  | namespace { | 
| 169 |  |  | 
| 170 |  | /// This struct is used to track simple keys. | 
| 171 |  | /// | 
| 172 |  | /// Simple keys are handled by creating an entry in SimpleKeys for each Token | 
| 173 |  | /// which could legally be the start of a simple key. When peekNext is called, | 
| 174 |  | /// if the Token To be returned is referenced by a SimpleKey, we continue | 
| 175 |  | /// tokenizing until that potential simple key has either been found to not be | 
| 176 |  | /// a simple key (we moved on to the next line or went further than 1024 chars). | 
| 177 |  | /// Or when we run into a Value, and then insert a Key token (and possibly | 
| 178 |  | /// others) before the SimpleKey's Tok. | 
| 179 |  | struct SimpleKey { | 
| 180 |  |   TokenQueueT::iterator Tok; | 
| 181 |  |   unsigned Column = 0; | 
| 182 |  |   unsigned Line = 0; | 
| 183 |  |   unsigned FlowLevel = 0; | 
| 184 |  |   bool IsRequired = false; | 
| 185 |  |  | 
| 186 | 0 |   bool operator ==(const SimpleKey &Other) { | 
| 187 | 0 |     return Tok == Other.Tok; | 
| 188 | 0 |   } | 
| 189 |  | }; | 
| 190 |  |  | 
| 191 |  | } // end anonymous namespace | 
| 192 |  |  | 
| 193 |  | /// The Unicode scalar value of a UTF-8 minimal well-formed code unit | 
| 194 |  | ///        subsequence and the subsequence's length in code units (uint8_t). | 
| 195 |  | ///        A length of 0 represents an error. | 
| 196 |  | using UTF8Decoded = std::pair<uint32_t, unsigned>; | 
| 197 |  |  | 
| 198 | 0 | static UTF8Decoded decodeUTF8(StringRef Range) { | 
| 199 | 0 |   StringRef::iterator Position= Range.begin(); | 
| 200 | 0 |   StringRef::iterator End = Range.end(); | 
| 201 | 0 |   // 1 byte: [0x00, 0x7f] | 
| 202 | 0 |   // Bit pattern: 0xxxxxxx | 
| 203 | 0 |   if ((*Position & 0x80) == 0) { | 
| 204 | 0 |      return std::make_pair(*Position, 1); | 
| 205 | 0 |   } | 
| 206 | 0 |   // 2 bytes: [0x80, 0x7ff] | 
| 207 | 0 |   // Bit pattern: 110xxxxx 10xxxxxx | 
| 208 | 0 |   if (Position + 1 != End && | 
| 209 | 0 |       ((*Position & 0xE0) == 0xC0) && | 
| 210 | 0 |       ((*(Position + 1) & 0xC0) == 0x80)) { | 
| 211 | 0 |     uint32_t codepoint = ((*Position & 0x1F) << 6) | | 
| 212 | 0 |                           (*(Position + 1) & 0x3F); | 
| 213 | 0 |     if (codepoint >= 0x80) | 
| 214 | 0 |       return std::make_pair(codepoint, 2); | 
| 215 | 0 |   } | 
| 216 | 0 |   // 3 bytes: [0x8000, 0xffff] | 
| 217 | 0 |   // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx | 
| 218 | 0 |   if (Position + 2 != End && | 
| 219 | 0 |       ((*Position & 0xF0) == 0xE0) && | 
| 220 | 0 |       ((*(Position + 1) & 0xC0) == 0x80) && | 
| 221 | 0 |       ((*(Position + 2) & 0xC0) == 0x80)) { | 
| 222 | 0 |     uint32_t codepoint = ((*Position & 0x0F) << 12) | | 
| 223 | 0 |                          ((*(Position + 1) & 0x3F) << 6) | | 
| 224 | 0 |                           (*(Position + 2) & 0x3F); | 
| 225 | 0 |     // Codepoints between 0xD800 and 0xDFFF are invalid, as | 
| 226 | 0 |     // they are high / low surrogate halves used by UTF-16. | 
| 227 | 0 |     if (codepoint >= 0x800 && | 
| 228 | 0 |         (codepoint < 0xD800 || codepoint > 0xDFFF)) | 
| 229 | 0 |       return std::make_pair(codepoint, 3); | 
| 230 | 0 |   } | 
| 231 | 0 |   // 4 bytes: [0x10000, 0x10FFFF] | 
| 232 | 0 |   // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | 
| 233 | 0 |   if (Position + 3 != End && | 
| 234 | 0 |       ((*Position & 0xF8) == 0xF0) && | 
| 235 | 0 |       ((*(Position + 1) & 0xC0) == 0x80) && | 
| 236 | 0 |       ((*(Position + 2) & 0xC0) == 0x80) && | 
| 237 | 0 |       ((*(Position + 3) & 0xC0) == 0x80)) { | 
| 238 | 0 |     uint32_t codepoint = ((*Position & 0x07) << 18) | | 
| 239 | 0 |                          ((*(Position + 1) & 0x3F) << 12) | | 
| 240 | 0 |                          ((*(Position + 2) & 0x3F) << 6) | | 
| 241 | 0 |                           (*(Position + 3) & 0x3F); | 
| 242 | 0 |     if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) | 
| 243 | 0 |       return std::make_pair(codepoint, 4); | 
| 244 | 0 |   } | 
| 245 | 0 |   return std::make_pair(0, 0); | 
| 246 | 0 | } | 
| 247 |  |  | 
| 248 |  | namespace llvm { | 
| 249 |  | namespace yaml { | 
| 250 |  |  | 
| 251 |  | /// Scans YAML tokens from a MemoryBuffer. | 
| 252 |  | class Scanner { | 
| 253 |  | public: | 
| 254 |  |   Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true, | 
| 255 |  |           std::error_code *EC = nullptr); | 
| 256 |  |   Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true, | 
| 257 |  |           std::error_code *EC = nullptr); | 
| 258 |  |  | 
| 259 |  |   /// Parse the next token and return it without popping it. | 
| 260 |  |   Token &peekNext(); | 
| 261 |  |  | 
| 262 |  |   /// Parse the next token and pop it from the queue. | 
| 263 |  |   Token getNext(); | 
| 264 |  |  | 
| 265 |  |   void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, | 
| 266 | 0 |                   ArrayRef<SMRange> Ranges = None) { | 
| 267 | 0 |     SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors); | 
| 268 | 0 |   } | 
| 269 |  |  | 
| 270 | 0 |   void setError(const Twine &Message, StringRef::iterator Position) { | 
| 271 | 0 |     if (Position >= End) | 
| 272 | 0 |       Position = End - 1; | 
| 273 | 0 | 
 | 
| 274 | 0 |     // propagate the error if possible | 
| 275 | 0 |     if (EC) | 
| 276 | 0 |       *EC = make_error_code(std::errc::invalid_argument); | 
| 277 | 0 | 
 | 
| 278 | 0 |     // Don't print out more errors after the first one we encounter. The rest | 
| 279 | 0 |     // are just the result of the first, and have no meaning. | 
| 280 | 0 |     if (!Failed) | 
| 281 | 0 |       printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message); | 
| 282 | 0 |     Failed = true; | 
| 283 | 0 |   } | 
| 284 |  |  | 
| 285 |  |   /// Returns true if an error occurred while parsing. | 
| 286 | 0 |   bool failed() { | 
| 287 | 0 |     return Failed; | 
| 288 | 0 |   } | 
| 289 |  |  | 
| 290 |  | private: | 
| 291 |  |   void init(MemoryBufferRef Buffer); | 
| 292 |  |  | 
| 293 | 0 |   StringRef currentInput() { | 
| 294 | 0 |     return StringRef(Current, End - Current); | 
| 295 | 0 |   } | 
| 296 |  |  | 
| 297 |  |   /// Decode a UTF-8 minimal well-formed code unit subsequence starting | 
| 298 |  |   ///        at \a Position. | 
| 299 |  |   /// | 
| 300 |  |   /// If the UTF-8 code units starting at Position do not form a well-formed | 
| 301 |  |   /// code unit subsequence, then the Unicode scalar value is 0, and the length | 
| 302 |  |   /// is 0. | 
| 303 | 0 |   UTF8Decoded decodeUTF8(StringRef::iterator Position) { | 
| 304 | 0 |     return ::decodeUTF8(StringRef(Position, End - Position)); | 
| 305 | 0 |   } | 
| 306 |  |  | 
| 307 |  |   // The following functions are based on the gramar rules in the YAML spec. The | 
| 308 |  |   // style of the function names it meant to closely match how they are written | 
| 309 |  |   // in the spec. The number within the [] is the number of the grammar rule in | 
| 310 |  |   // the spec. | 
| 311 |  |   // | 
| 312 |  |   // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. | 
| 313 |  |   // | 
| 314 |  |   // c- | 
| 315 |  |   //   A production starting and ending with a special character. | 
| 316 |  |   // b- | 
| 317 |  |   //   A production matching a single line break. | 
| 318 |  |   // nb- | 
| 319 |  |   //   A production starting and ending with a non-break character. | 
| 320 |  |   // s- | 
| 321 |  |   //   A production starting and ending with a white space character. | 
| 322 |  |   // ns- | 
| 323 |  |   //   A production starting and ending with a non-space character. | 
| 324 |  |   // l- | 
| 325 |  |   //   A production matching complete line(s). | 
| 326 |  |  | 
| 327 |  |   /// Skip a single nb-char[27] starting at Position. | 
| 328 |  |   /// | 
| 329 |  |   /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] | 
| 330 |  |   ///                  | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] | 
| 331 |  |   /// | 
| 332 |  |   /// @returns The code unit after the nb-char, or Position if it's not an | 
| 333 |  |   ///          nb-char. | 
| 334 |  |   StringRef::iterator skip_nb_char(StringRef::iterator Position); | 
| 335 |  |  | 
| 336 |  |   /// Skip a single b-break[28] starting at Position. | 
| 337 |  |   /// | 
| 338 |  |   /// A b-break is 0xD 0xA | 0xD | 0xA | 
| 339 |  |   /// | 
| 340 |  |   /// @returns The code unit after the b-break, or Position if it's not a | 
| 341 |  |   ///          b-break. | 
| 342 |  |   StringRef::iterator skip_b_break(StringRef::iterator Position); | 
| 343 |  |  | 
| 344 |  |   /// Skip a single s-space[31] starting at Position. | 
| 345 |  |   /// | 
| 346 |  |   /// An s-space is 0x20 | 
| 347 |  |   /// | 
| 348 |  |   /// @returns The code unit after the s-space, or Position if it's not a | 
| 349 |  |   ///          s-space. | 
| 350 |  |   StringRef::iterator skip_s_space(StringRef::iterator Position); | 
| 351 |  |  | 
| 352 |  |   /// Skip a single s-white[33] starting at Position. | 
| 353 |  |   /// | 
| 354 |  |   /// A s-white is 0x20 | 0x9 | 
| 355 |  |   /// | 
| 356 |  |   /// @returns The code unit after the s-white, or Position if it's not a | 
| 357 |  |   ///          s-white. | 
| 358 |  |   StringRef::iterator skip_s_white(StringRef::iterator Position); | 
| 359 |  |  | 
| 360 |  |   /// Skip a single ns-char[34] starting at Position. | 
| 361 |  |   /// | 
| 362 |  |   /// A ns-char is nb-char - s-white | 
| 363 |  |   /// | 
| 364 |  |   /// @returns The code unit after the ns-char, or Position if it's not a | 
| 365 |  |   ///          ns-char. | 
| 366 |  |   StringRef::iterator skip_ns_char(StringRef::iterator Position); | 
| 367 |  |  | 
| 368 |  |   using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator); | 
| 369 |  |  | 
| 370 |  |   /// Skip minimal well-formed code unit subsequences until Func | 
| 371 |  |   ///        returns its input. | 
| 372 |  |   /// | 
| 373 |  |   /// @returns The code unit after the last minimal well-formed code unit | 
| 374 |  |   ///          subsequence that Func accepted. | 
| 375 |  |   StringRef::iterator skip_while( SkipWhileFunc Func | 
| 376 |  |                                 , StringRef::iterator Position); | 
| 377 |  |  | 
| 378 |  |   /// Skip minimal well-formed code unit subsequences until Func returns its | 
| 379 |  |   /// input. | 
| 380 |  |   void advanceWhile(SkipWhileFunc Func); | 
| 381 |  |  | 
| 382 |  |   /// Scan ns-uri-char[39]s starting at Cur. | 
| 383 |  |   /// | 
| 384 |  |   /// This updates Cur and Column while scanning. | 
| 385 |  |   void scan_ns_uri_char(); | 
| 386 |  |  | 
| 387 |  |   /// Consume a minimal well-formed code unit subsequence starting at | 
| 388 |  |   ///        \a Cur. Return false if it is not the same Unicode scalar value as | 
| 389 |  |   ///        \a Expected. This updates \a Column. | 
| 390 |  |   bool consume(uint32_t Expected); | 
| 391 |  |  | 
| 392 |  |   /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. | 
| 393 |  |   void skip(uint32_t Distance); | 
| 394 |  |  | 
| 395 |  |   /// Return true if the minimal well-formed code unit subsequence at | 
| 396 |  |   ///        Pos is whitespace or a new line | 
| 397 |  |   bool isBlankOrBreak(StringRef::iterator Position); | 
| 398 |  |  | 
| 399 |  |   /// Consume a single b-break[28] if it's present at the current position. | 
| 400 |  |   /// | 
| 401 |  |   /// Return false if the code unit at the current position isn't a line break. | 
| 402 |  |   bool consumeLineBreakIfPresent(); | 
| 403 |  |  | 
| 404 |  |   /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey. | 
| 405 |  |   void saveSimpleKeyCandidate( TokenQueueT::iterator Tok | 
| 406 |  |                              , unsigned AtColumn | 
| 407 |  |                              , bool IsRequired); | 
| 408 |  |  | 
| 409 |  |   /// Remove simple keys that can no longer be valid simple keys. | 
| 410 |  |   /// | 
| 411 |  |   /// Invalid simple keys are not on the current line or are further than 1024 | 
| 412 |  |   /// columns back. | 
| 413 |  |   void removeStaleSimpleKeyCandidates(); | 
| 414 |  |  | 
| 415 |  |   /// Remove all simple keys on FlowLevel \a Level. | 
| 416 |  |   void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); | 
| 417 |  |  | 
| 418 |  |   /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd | 
| 419 |  |   ///        tokens if needed. | 
| 420 |  |   bool unrollIndent(int ToColumn); | 
| 421 |  |  | 
| 422 |  |   /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint | 
| 423 |  |   ///        if needed. | 
| 424 |  |   bool rollIndent( int ToColumn | 
| 425 |  |                  , Token::TokenKind Kind | 
| 426 |  |                  , TokenQueueT::iterator InsertPoint); | 
| 427 |  |  | 
| 428 |  |   /// Skip a single-line comment when the comment starts at the current | 
| 429 |  |   /// position of the scanner. | 
| 430 |  |   void skipComment(); | 
| 431 |  |  | 
| 432 |  |   /// Skip whitespace and comments until the start of the next token. | 
| 433 |  |   void scanToNextToken(); | 
| 434 |  |  | 
| 435 |  |   /// Must be the first token generated. | 
| 436 |  |   bool scanStreamStart(); | 
| 437 |  |  | 
| 438 |  |   /// Generate tokens needed to close out the stream. | 
| 439 |  |   bool scanStreamEnd(); | 
| 440 |  |  | 
| 441 |  |   /// Scan a %BLAH directive. | 
| 442 |  |   bool scanDirective(); | 
| 443 |  |  | 
| 444 |  |   /// Scan a ... or ---. | 
| 445 |  |   bool scanDocumentIndicator(bool IsStart); | 
| 446 |  |  | 
| 447 |  |   /// Scan a [ or { and generate the proper flow collection start token. | 
| 448 |  |   bool scanFlowCollectionStart(bool IsSequence); | 
| 449 |  |  | 
| 450 |  |   /// Scan a ] or } and generate the proper flow collection end token. | 
| 451 |  |   bool scanFlowCollectionEnd(bool IsSequence); | 
| 452 |  |  | 
| 453 |  |   /// Scan the , that separates entries in a flow collection. | 
| 454 |  |   bool scanFlowEntry(); | 
| 455 |  |  | 
| 456 |  |   /// Scan the - that starts block sequence entries. | 
| 457 |  |   bool scanBlockEntry(); | 
| 458 |  |  | 
| 459 |  |   /// Scan an explicit ? indicating a key. | 
| 460 |  |   bool scanKey(); | 
| 461 |  |  | 
| 462 |  |   /// Scan an explicit : indicating a value. | 
| 463 |  |   bool scanValue(); | 
| 464 |  |  | 
| 465 |  |   /// Scan a quoted scalar. | 
| 466 |  |   bool scanFlowScalar(bool IsDoubleQuoted); | 
| 467 |  |  | 
| 468 |  |   /// Scan an unquoted scalar. | 
| 469 |  |   bool scanPlainScalar(); | 
| 470 |  |  | 
| 471 |  |   /// Scan an Alias or Anchor starting with * or &. | 
| 472 |  |   bool scanAliasOrAnchor(bool IsAlias); | 
| 473 |  |  | 
| 474 |  |   /// Scan a block scalar starting with | or >. | 
| 475 |  |   bool scanBlockScalar(bool IsLiteral); | 
| 476 |  |  | 
| 477 |  |   /// Scan a chomping indicator in a block scalar header. | 
| 478 |  |   char scanBlockChompingIndicator(); | 
| 479 |  |  | 
| 480 |  |   /// Scan an indentation indicator in a block scalar header. | 
| 481 |  |   unsigned scanBlockIndentationIndicator(); | 
| 482 |  |  | 
| 483 |  |   /// Scan a block scalar header. | 
| 484 |  |   /// | 
| 485 |  |   /// Return false if an error occurred. | 
| 486 |  |   bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator, | 
| 487 |  |                              bool &IsDone); | 
| 488 |  |  | 
| 489 |  |   /// Look for the indentation level of a block scalar. | 
| 490 |  |   /// | 
| 491 |  |   /// Return false if an error occurred. | 
| 492 |  |   bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent, | 
| 493 |  |                              unsigned &LineBreaks, bool &IsDone); | 
| 494 |  |  | 
| 495 |  |   /// Scan the indentation of a text line in a block scalar. | 
| 496 |  |   /// | 
| 497 |  |   /// Return false if an error occurred. | 
| 498 |  |   bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent, | 
| 499 |  |                              bool &IsDone); | 
| 500 |  |  | 
| 501 |  |   /// Scan a tag of the form !stuff. | 
| 502 |  |   bool scanTag(); | 
| 503 |  |  | 
| 504 |  |   /// Dispatch to the next scanning function based on \a *Cur. | 
| 505 |  |   bool fetchMoreTokens(); | 
| 506 |  |  | 
| 507 |  |   /// The SourceMgr used for diagnostics and buffer management. | 
| 508 |  |   SourceMgr &SM; | 
| 509 |  |  | 
| 510 |  |   /// The original input. | 
| 511 |  |   MemoryBufferRef InputBuffer; | 
| 512 |  |  | 
| 513 |  |   /// The current position of the scanner. | 
| 514 |  |   StringRef::iterator Current; | 
| 515 |  |  | 
| 516 |  |   /// The end of the input (one past the last character). | 
| 517 |  |   StringRef::iterator End; | 
| 518 |  |  | 
| 519 |  |   /// Current YAML indentation level in spaces. | 
| 520 |  |   int Indent; | 
| 521 |  |  | 
| 522 |  |   /// Current column number in Unicode code points. | 
| 523 |  |   unsigned Column; | 
| 524 |  |  | 
| 525 |  |   /// Current line number. | 
| 526 |  |   unsigned Line; | 
| 527 |  |  | 
| 528 |  |   /// How deep we are in flow style containers. 0 Means at block level. | 
| 529 |  |   unsigned FlowLevel; | 
| 530 |  |  | 
| 531 |  |   /// Are we at the start of the stream? | 
| 532 |  |   bool IsStartOfStream; | 
| 533 |  |  | 
| 534 |  |   /// Can the next token be the start of a simple key? | 
| 535 |  |   bool IsSimpleKeyAllowed; | 
| 536 |  |  | 
| 537 |  |   /// True if an error has occurred. | 
| 538 |  |   bool Failed; | 
| 539 |  |  | 
| 540 |  |   /// Should colors be used when printing out the diagnostic messages? | 
| 541 |  |   bool ShowColors; | 
| 542 |  |  | 
| 543 |  |   /// Queue of tokens. This is required to queue up tokens while looking | 
| 544 |  |   ///        for the end of a simple key. And for cases where a single character | 
| 545 |  |   ///        can produce multiple tokens (e.g. BlockEnd). | 
| 546 |  |   TokenQueueT TokenQueue; | 
| 547 |  |  | 
| 548 |  |   /// Indentation levels. | 
| 549 |  |   SmallVector<int, 4> Indents; | 
| 550 |  |  | 
| 551 |  |   /// Potential simple keys. | 
| 552 |  |   SmallVector<SimpleKey, 4> SimpleKeys; | 
| 553 |  |  | 
| 554 |  |   std::error_code *EC; | 
| 555 |  | }; | 
| 556 |  |  | 
| 557 |  | } // end namespace yaml | 
| 558 |  | } // end namespace llvm | 
| 559 |  |  | 
| 560 |  | /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. | 
| 561 |  | static void encodeUTF8( uint32_t UnicodeScalarValue | 
| 562 | 0 |                       , SmallVectorImpl<char> &Result) { | 
| 563 | 0 |   if (UnicodeScalarValue <= 0x7F) { | 
| 564 | 0 |     Result.push_back(UnicodeScalarValue & 0x7F); | 
| 565 | 0 |   } else if (UnicodeScalarValue <= 0x7FF) { | 
| 566 | 0 |     uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); | 
| 567 | 0 |     uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); | 
| 568 | 0 |     Result.push_back(FirstByte); | 
| 569 | 0 |     Result.push_back(SecondByte); | 
| 570 | 0 |   } else if (UnicodeScalarValue <= 0xFFFF) { | 
| 571 | 0 |     uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); | 
| 572 | 0 |     uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); | 
| 573 | 0 |     uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); | 
| 574 | 0 |     Result.push_back(FirstByte); | 
| 575 | 0 |     Result.push_back(SecondByte); | 
| 576 | 0 |     Result.push_back(ThirdByte); | 
| 577 | 0 |   } else if (UnicodeScalarValue <= 0x10FFFF) { | 
| 578 | 0 |     uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); | 
| 579 | 0 |     uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); | 
| 580 | 0 |     uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); | 
| 581 | 0 |     uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); | 
| 582 | 0 |     Result.push_back(FirstByte); | 
| 583 | 0 |     Result.push_back(SecondByte); | 
| 584 | 0 |     Result.push_back(ThirdByte); | 
| 585 | 0 |     Result.push_back(FourthByte); | 
| 586 | 0 |   } | 
| 587 | 0 | } | 
| 588 |  |  | 
| 589 | 0 | bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { | 
| 590 | 0 |   SourceMgr SM; | 
| 591 | 0 |   Scanner scanner(Input, SM); | 
| 592 | 0 |   while (true) { | 
| 593 | 0 |     Token T = scanner.getNext(); | 
| 594 | 0 |     switch (T.Kind) { | 
| 595 | 0 |     case Token::TK_StreamStart: | 
| 596 | 0 |       OS << "Stream-Start: "; | 
| 597 | 0 |       break; | 
| 598 | 0 |     case Token::TK_StreamEnd: | 
| 599 | 0 |       OS << "Stream-End: "; | 
| 600 | 0 |       break; | 
| 601 | 0 |     case Token::TK_VersionDirective: | 
| 602 | 0 |       OS << "Version-Directive: "; | 
| 603 | 0 |       break; | 
| 604 | 0 |     case Token::TK_TagDirective: | 
| 605 | 0 |       OS << "Tag-Directive: "; | 
| 606 | 0 |       break; | 
| 607 | 0 |     case Token::TK_DocumentStart: | 
| 608 | 0 |       OS << "Document-Start: "; | 
| 609 | 0 |       break; | 
| 610 | 0 |     case Token::TK_DocumentEnd: | 
| 611 | 0 |       OS << "Document-End: "; | 
| 612 | 0 |       break; | 
| 613 | 0 |     case Token::TK_BlockEntry: | 
| 614 | 0 |       OS << "Block-Entry: "; | 
| 615 | 0 |       break; | 
| 616 | 0 |     case Token::TK_BlockEnd: | 
| 617 | 0 |       OS << "Block-End: "; | 
| 618 | 0 |       break; | 
| 619 | 0 |     case Token::TK_BlockSequenceStart: | 
| 620 | 0 |       OS << "Block-Sequence-Start: "; | 
| 621 | 0 |       break; | 
| 622 | 0 |     case Token::TK_BlockMappingStart: | 
| 623 | 0 |       OS << "Block-Mapping-Start: "; | 
| 624 | 0 |       break; | 
| 625 | 0 |     case Token::TK_FlowEntry: | 
| 626 | 0 |       OS << "Flow-Entry: "; | 
| 627 | 0 |       break; | 
| 628 | 0 |     case Token::TK_FlowSequenceStart: | 
| 629 | 0 |       OS << "Flow-Sequence-Start: "; | 
| 630 | 0 |       break; | 
| 631 | 0 |     case Token::TK_FlowSequenceEnd: | 
| 632 | 0 |       OS << "Flow-Sequence-End: "; | 
| 633 | 0 |       break; | 
| 634 | 0 |     case Token::TK_FlowMappingStart: | 
| 635 | 0 |       OS << "Flow-Mapping-Start: "; | 
| 636 | 0 |       break; | 
| 637 | 0 |     case Token::TK_FlowMappingEnd: | 
| 638 | 0 |       OS << "Flow-Mapping-End: "; | 
| 639 | 0 |       break; | 
| 640 | 0 |     case Token::TK_Key: | 
| 641 | 0 |       OS << "Key: "; | 
| 642 | 0 |       break; | 
| 643 | 0 |     case Token::TK_Value: | 
| 644 | 0 |       OS << "Value: "; | 
| 645 | 0 |       break; | 
| 646 | 0 |     case Token::TK_Scalar: | 
| 647 | 0 |       OS << "Scalar: "; | 
| 648 | 0 |       break; | 
| 649 | 0 |     case Token::TK_BlockScalar: | 
| 650 | 0 |       OS << "Block Scalar: "; | 
| 651 | 0 |       break; | 
| 652 | 0 |     case Token::TK_Alias: | 
| 653 | 0 |       OS << "Alias: "; | 
| 654 | 0 |       break; | 
| 655 | 0 |     case Token::TK_Anchor: | 
| 656 | 0 |       OS << "Anchor: "; | 
| 657 | 0 |       break; | 
| 658 | 0 |     case Token::TK_Tag: | 
| 659 | 0 |       OS << "Tag: "; | 
| 660 | 0 |       break; | 
| 661 | 0 |     case Token::TK_Error: | 
| 662 | 0 |       break; | 
| 663 | 0 |     } | 
| 664 | 0 |     OS << T.Range << "\n"; | 
| 665 | 0 |     if (T.Kind == Token::TK_StreamEnd) | 
| 666 | 0 |       break; | 
| 667 | 0 |     else if (T.Kind == Token::TK_Error) | 
| 668 | 0 |       return false; | 
| 669 | 0 |   } | 
| 670 | 0 |   return true; | 
| 671 | 0 | } | 
| 672 |  |  | 
| 673 | 0 | bool yaml::scanTokens(StringRef Input) { | 
| 674 | 0 |   SourceMgr SM; | 
| 675 | 0 |   Scanner scanner(Input, SM); | 
| 676 | 0 |   while (true) { | 
| 677 | 0 |     Token T = scanner.getNext(); | 
| 678 | 0 |     if (T.Kind == Token::TK_StreamEnd) | 
| 679 | 0 |       break; | 
| 680 | 0 |     else if (T.Kind == Token::TK_Error) | 
| 681 | 0 |       return false; | 
| 682 | 0 |   } | 
| 683 | 0 |   return true; | 
| 684 | 0 | } | 
| 685 |  |  | 
| 686 | 0 | std::string yaml::escape(StringRef Input, bool EscapePrintable) { | 
| 687 | 0 |   std::string EscapedInput; | 
| 688 | 0 |   for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { | 
| 689 | 0 |     if (*i == '\\') | 
| 690 | 0 |       EscapedInput += "\\\\"; | 
| 691 | 0 |     else if (*i == '"') | 
| 692 | 0 |       EscapedInput += "\\\""; | 
| 693 | 0 |     else if (*i == 0) | 
| 694 | 0 |       EscapedInput += "\\0"; | 
| 695 | 0 |     else if (*i == 0x07) | 
| 696 | 0 |       EscapedInput += "\\a"; | 
| 697 | 0 |     else if (*i == 0x08) | 
| 698 | 0 |       EscapedInput += "\\b"; | 
| 699 | 0 |     else if (*i == 0x09) | 
| 700 | 0 |       EscapedInput += "\\t"; | 
| 701 | 0 |     else if (*i == 0x0A) | 
| 702 | 0 |       EscapedInput += "\\n"; | 
| 703 | 0 |     else if (*i == 0x0B) | 
| 704 | 0 |       EscapedInput += "\\v"; | 
| 705 | 0 |     else if (*i == 0x0C) | 
| 706 | 0 |       EscapedInput += "\\f"; | 
| 707 | 0 |     else if (*i == 0x0D) | 
| 708 | 0 |       EscapedInput += "\\r"; | 
| 709 | 0 |     else if (*i == 0x1B) | 
| 710 | 0 |       EscapedInput += "\\e"; | 
| 711 | 0 |     else if ((unsigned char)*i < 0x20) { // Control characters not handled above. | 
| 712 | 0 |       std::string HexStr = utohexstr(*i); | 
| 713 | 0 |       EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; | 
| 714 | 0 |     } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. | 
| 715 | 0 |       UTF8Decoded UnicodeScalarValue | 
| 716 | 0 |         = decodeUTF8(StringRef(i, Input.end() - i)); | 
| 717 | 0 |       if (UnicodeScalarValue.second == 0) { | 
| 718 | 0 |         // Found invalid char. | 
| 719 | 0 |         SmallString<4> Val; | 
| 720 | 0 |         encodeUTF8(0xFFFD, Val); | 
| 721 | 0 |         EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); | 
| 722 | 0 |         // FIXME: Error reporting. | 
| 723 | 0 |         return EscapedInput; | 
| 724 | 0 |       } | 
| 725 | 0 |       if (UnicodeScalarValue.first == 0x85) | 
| 726 | 0 |         EscapedInput += "\\N"; | 
| 727 | 0 |       else if (UnicodeScalarValue.first == 0xA0) | 
| 728 | 0 |         EscapedInput += "\\_"; | 
| 729 | 0 |       else if (UnicodeScalarValue.first == 0x2028) | 
| 730 | 0 |         EscapedInput += "\\L"; | 
| 731 | 0 |       else if (UnicodeScalarValue.first == 0x2029) | 
| 732 | 0 |         EscapedInput += "\\P"; | 
| 733 | 0 |       else if (!EscapePrintable && | 
| 734 | 0 |                sys::unicode::isPrintable(UnicodeScalarValue.first)) | 
| 735 | 0 |         EscapedInput += StringRef(i, UnicodeScalarValue.second); | 
| 736 | 0 |       else { | 
| 737 | 0 |         std::string HexStr = utohexstr(UnicodeScalarValue.first); | 
| 738 | 0 |         if (HexStr.size() <= 2) | 
| 739 | 0 |           EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; | 
| 740 | 0 |         else if (HexStr.size() <= 4) | 
| 741 | 0 |           EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; | 
| 742 | 0 |         else if (HexStr.size() <= 8) | 
| 743 | 0 |           EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; | 
| 744 | 0 |       } | 
| 745 | 0 |       i += UnicodeScalarValue.second - 1; | 
| 746 | 0 |     } else | 
| 747 | 0 |       EscapedInput.push_back(*i); | 
| 748 | 0 |   } | 
| 749 | 0 |   return EscapedInput; | 
| 750 | 0 | } | 
| 751 |  |  | 
| 752 |  | Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors, | 
| 753 |  |                  std::error_code *EC) | 
| 754 | 0 |     : SM(sm), ShowColors(ShowColors), EC(EC) { | 
| 755 | 0 |   init(MemoryBufferRef(Input, "YAML")); | 
| 756 | 0 | } | 
| 757 |  |  | 
| 758 |  | Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors, | 
| 759 |  |                  std::error_code *EC) | 
| 760 | 0 |     : SM(SM_), ShowColors(ShowColors), EC(EC) { | 
| 761 | 0 |   init(Buffer); | 
| 762 | 0 | } | 
| 763 |  |  | 
| 764 | 0 | void Scanner::init(MemoryBufferRef Buffer) { | 
| 765 | 0 |   InputBuffer = Buffer; | 
| 766 | 0 |   Current = InputBuffer.getBufferStart(); | 
| 767 | 0 |   End = InputBuffer.getBufferEnd(); | 
| 768 | 0 |   Indent = -1; | 
| 769 | 0 |   Column = 0; | 
| 770 | 0 |   Line = 0; | 
| 771 | 0 |   FlowLevel = 0; | 
| 772 | 0 |   IsStartOfStream = true; | 
| 773 | 0 |   IsSimpleKeyAllowed = true; | 
| 774 | 0 |   Failed = false; | 
| 775 | 0 |   std::unique_ptr<MemoryBuffer> InputBufferOwner = | 
| 776 | 0 |       MemoryBuffer::getMemBuffer(Buffer); | 
| 777 | 0 |   SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); | 
| 778 | 0 | } | 
| 779 |  |  | 
| 780 | 0 | Token &Scanner::peekNext() { | 
| 781 | 0 |   // If the current token is a possible simple key, keep parsing until we | 
| 782 | 0 |   // can confirm. | 
| 783 | 0 |   bool NeedMore = false; | 
| 784 | 0 |   while (true) { | 
| 785 | 0 |     if (TokenQueue.empty() || NeedMore) { | 
| 786 | 0 |       if (!fetchMoreTokens()) { | 
| 787 | 0 |         TokenQueue.clear(); | 
| 788 | 0 |         SimpleKeys.clear(); | 
| 789 | 0 |         TokenQueue.push_back(Token()); | 
| 790 | 0 |         return TokenQueue.front(); | 
| 791 | 0 |       } | 
| 792 | 0 |     } | 
| 793 | 0 |     assert(!TokenQueue.empty() && | 
| 794 | 0 |             "fetchMoreTokens lied about getting tokens!"); | 
| 795 | 0 | 
 | 
| 796 | 0 |     removeStaleSimpleKeyCandidates(); | 
| 797 | 0 |     SimpleKey SK; | 
| 798 | 0 |     SK.Tok = TokenQueue.begin(); | 
| 799 | 0 |     if (!is_contained(SimpleKeys, SK)) | 
| 800 | 0 |       break; | 
| 801 | 0 |     else | 
| 802 | 0 |       NeedMore = true; | 
| 803 | 0 |   } | 
| 804 | 0 |   return TokenQueue.front(); | 
| 805 | 0 | } | 
| 806 |  |  | 
| 807 | 0 | Token Scanner::getNext() { | 
| 808 | 0 |   Token Ret = peekNext(); | 
| 809 | 0 |   // TokenQueue can be empty if there was an error getting the next token. | 
| 810 | 0 |   if (!TokenQueue.empty()) | 
| 811 | 0 |     TokenQueue.pop_front(); | 
| 812 | 0 | 
 | 
| 813 | 0 |   // There cannot be any referenced Token's if the TokenQueue is empty. So do a | 
| 814 | 0 |   // quick deallocation of them all. | 
| 815 | 0 |   if (TokenQueue.empty()) | 
| 816 | 0 |     TokenQueue.resetAlloc(); | 
| 817 | 0 | 
 | 
| 818 | 0 |   return Ret; | 
| 819 | 0 | } | 
| 820 |  |  | 
| 821 | 0 | StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { | 
| 822 | 0 |   if (Position == End) | 
| 823 | 0 |     return Position; | 
| 824 | 0 |   // Check 7 bit c-printable - b-char. | 
| 825 | 0 |   if (   *Position == 0x09 | 
| 826 | 0 |       || (*Position >= 0x20 && *Position <= 0x7E)) | 
| 827 | 0 |     return Position + 1; | 
| 828 | 0 |  | 
| 829 | 0 |   // Check for valid UTF-8. | 
| 830 | 0 |   if (uint8_t(*Position) & 0x80) { | 
| 831 | 0 |     UTF8Decoded u8d = decodeUTF8(Position); | 
| 832 | 0 |     if (   u8d.second != 0 | 
| 833 | 0 |         && u8d.first != 0xFEFF | 
| 834 | 0 |         && ( u8d.first == 0x85 | 
| 835 | 0 |           || ( u8d.first >= 0xA0 | 
| 836 | 0 |             && u8d.first <= 0xD7FF) | 
| 837 | 0 |           || ( u8d.first >= 0xE000 | 
| 838 | 0 |             && u8d.first <= 0xFFFD) | 
| 839 | 0 |           || ( u8d.first >= 0x10000 | 
| 840 | 0 |             && u8d.first <= 0x10FFFF))) | 
| 841 | 0 |       return Position + u8d.second; | 
| 842 | 0 |   } | 
| 843 | 0 |   return Position; | 
| 844 | 0 | } | 
| 845 |  |  | 
| 846 | 0 | StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { | 
| 847 | 0 |   if (Position == End) | 
| 848 | 0 |     return Position; | 
| 849 | 0 |   if (*Position == 0x0D) { | 
| 850 | 0 |     if (Position + 1 != End && *(Position + 1) == 0x0A) | 
| 851 | 0 |       return Position + 2; | 
| 852 | 0 |     return Position + 1; | 
| 853 | 0 |   } | 
| 854 | 0 |  | 
| 855 | 0 |   if (*Position == 0x0A) | 
| 856 | 0 |     return Position + 1; | 
| 857 | 0 |   return Position; | 
| 858 | 0 | } | 
| 859 |  |  | 
| 860 | 0 | StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) { | 
| 861 | 0 |   if (Position == End) | 
| 862 | 0 |     return Position; | 
| 863 | 0 |   if (*Position == ' ') | 
| 864 | 0 |     return Position + 1; | 
| 865 | 0 |   return Position; | 
| 866 | 0 | } | 
| 867 |  |  | 
| 868 | 0 | StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { | 
| 869 | 0 |   if (Position == End) | 
| 870 | 0 |     return Position; | 
| 871 | 0 |   if (*Position == ' ' || *Position == '\t') | 
| 872 | 0 |     return Position + 1; | 
| 873 | 0 |   return Position; | 
| 874 | 0 | } | 
| 875 |  |  | 
| 876 | 0 | StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { | 
| 877 | 0 |   if (Position == End) | 
| 878 | 0 |     return Position; | 
| 879 | 0 |   if (*Position == ' ' || *Position == '\t') | 
| 880 | 0 |     return Position; | 
| 881 | 0 |   return skip_nb_char(Position); | 
| 882 | 0 | } | 
| 883 |  |  | 
| 884 |  | StringRef::iterator Scanner::skip_while( SkipWhileFunc Func | 
| 885 | 0 |                                        , StringRef::iterator Position) { | 
| 886 | 0 |   while (true) { | 
| 887 | 0 |     StringRef::iterator i = (this->*Func)(Position); | 
| 888 | 0 |     if (i == Position) | 
| 889 | 0 |       break; | 
| 890 | 0 |     Position = i; | 
| 891 | 0 |   } | 
| 892 | 0 |   return Position; | 
| 893 | 0 | } | 
| 894 |  |  | 
| 895 | 0 | void Scanner::advanceWhile(SkipWhileFunc Func) { | 
| 896 | 0 |   auto Final = skip_while(Func, Current); | 
| 897 | 0 |   Column += Final - Current; | 
| 898 | 0 |   Current = Final; | 
| 899 | 0 | } | 
| 900 |  |  | 
| 901 | 0 | static bool is_ns_hex_digit(const char C) { | 
| 902 | 0 |   return    (C >= '0' && C <= '9') | 
| 903 | 0 |          || (C >= 'a' && C <= 'z') | 
| 904 | 0 |          || (C >= 'A' && C <= 'Z'); | 
| 905 | 0 | } | 
| 906 |  |  | 
| 907 | 0 | static bool is_ns_word_char(const char C) { | 
| 908 | 0 |   return    C == '-' | 
| 909 | 0 |          || (C >= 'a' && C <= 'z') | 
| 910 | 0 |          || (C >= 'A' && C <= 'Z'); | 
| 911 | 0 | } | 
| 912 |  |  | 
| 913 | 0 | void Scanner::scan_ns_uri_char() { | 
| 914 | 0 |   while (true) { | 
| 915 | 0 |     if (Current == End) | 
| 916 | 0 |       break; | 
| 917 | 0 |     if ((   *Current == '%' | 
| 918 | 0 |           && Current + 2 < End | 
| 919 | 0 |           && is_ns_hex_digit(*(Current + 1)) | 
| 920 | 0 |           && is_ns_hex_digit(*(Current + 2))) | 
| 921 | 0 |         || is_ns_word_char(*Current) | 
| 922 | 0 |         || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") | 
| 923 | 0 |           != StringRef::npos) { | 
| 924 | 0 |       ++Current; | 
| 925 | 0 |       ++Column; | 
| 926 | 0 |     } else | 
| 927 | 0 |       break; | 
| 928 | 0 |   } | 
| 929 | 0 | } | 
| 930 |  |  | 
| 931 | 0 | bool Scanner::consume(uint32_t Expected) { | 
| 932 | 0 |   if (Expected >= 0x80) { | 
| 933 | 0 |     setError("Cannot consume non-ascii characters", Current); | 
| 934 | 0 |     return false; | 
| 935 | 0 |   } | 
| 936 | 0 |   if (Current == End) | 
| 937 | 0 |     return false; | 
| 938 | 0 |   if (uint8_t(*Current) >= 0x80) { | 
| 939 | 0 |     setError("Cannot consume non-ascii characters", Current); | 
| 940 | 0 |     return false; | 
| 941 | 0 |   } | 
| 942 | 0 |   if (uint8_t(*Current) == Expected) { | 
| 943 | 0 |     ++Current; | 
| 944 | 0 |     ++Column; | 
| 945 | 0 |     return true; | 
| 946 | 0 |   } | 
| 947 | 0 |   return false; | 
| 948 | 0 | } | 
| 949 |  |  | 
| 950 | 0 | void Scanner::skip(uint32_t Distance) { | 
| 951 | 0 |   Current += Distance; | 
| 952 | 0 |   Column += Distance; | 
| 953 | 0 |   assert(Current <= End && "Skipped past the end"); | 
| 954 | 0 | } | 
| 955 |  |  | 
| 956 | 0 | bool Scanner::isBlankOrBreak(StringRef::iterator Position) { | 
| 957 | 0 |   if (Position == End) | 
| 958 | 0 |     return false; | 
| 959 | 0 |   return *Position == ' ' || *Position == '\t' || *Position == '\r' || | 
| 960 | 0 |          *Position == '\n'; | 
| 961 | 0 | } | 
| 962 |  |  | 
| 963 | 0 | bool Scanner::consumeLineBreakIfPresent() { | 
| 964 | 0 |   auto Next = skip_b_break(Current); | 
| 965 | 0 |   if (Next == Current) | 
| 966 | 0 |     return false; | 
| 967 | 0 |   Column = 0; | 
| 968 | 0 |   ++Line; | 
| 969 | 0 |   Current = Next; | 
| 970 | 0 |   return true; | 
| 971 | 0 | } | 
| 972 |  |  | 
| 973 |  | void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok | 
| 974 |  |                                     , unsigned AtColumn | 
| 975 | 0 |                                     , bool IsRequired) { | 
| 976 | 0 |   if (IsSimpleKeyAllowed) { | 
| 977 | 0 |     SimpleKey SK; | 
| 978 | 0 |     SK.Tok = Tok; | 
| 979 | 0 |     SK.Line = Line; | 
| 980 | 0 |     SK.Column = AtColumn; | 
| 981 | 0 |     SK.IsRequired = IsRequired; | 
| 982 | 0 |     SK.FlowLevel = FlowLevel; | 
| 983 | 0 |     SimpleKeys.push_back(SK); | 
| 984 | 0 |   } | 
| 985 | 0 | } | 
| 986 |  |  | 
| 987 | 0 | void Scanner::removeStaleSimpleKeyCandidates() { | 
| 988 | 0 |   for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); | 
| 989 | 0 |                                             i != SimpleKeys.end();) { | 
| 990 | 0 |     if (i->Line != Line || i->Column + 1024 < Column) { | 
| 991 | 0 |       if (i->IsRequired) | 
| 992 | 0 |         setError( "Could not find expected : for simple key" | 
| 993 | 0 |                 , i->Tok->Range.begin()); | 
| 994 | 0 |       i = SimpleKeys.erase(i); | 
| 995 | 0 |     } else | 
| 996 | 0 |       ++i; | 
| 997 | 0 |   } | 
| 998 | 0 | } | 
| 999 |  |  | 
| 1000 | 0 | void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { | 
| 1001 | 0 |   if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) | 
| 1002 | 0 |     SimpleKeys.pop_back(); | 
| 1003 | 0 | } | 
| 1004 |  |  | 
| 1005 | 0 | bool Scanner::unrollIndent(int ToColumn) { | 
| 1006 | 0 |   Token T; | 
| 1007 | 0 |   // Indentation is ignored in flow. | 
| 1008 | 0 |   if (FlowLevel != 0) | 
| 1009 | 0 |     return true; | 
| 1010 | 0 |  | 
| 1011 | 0 |   while (Indent > ToColumn) { | 
| 1012 | 0 |     T.Kind = Token::TK_BlockEnd; | 
| 1013 | 0 |     T.Range = StringRef(Current, 1); | 
| 1014 | 0 |     TokenQueue.push_back(T); | 
| 1015 | 0 |     Indent = Indents.pop_back_val(); | 
| 1016 | 0 |   } | 
| 1017 | 0 | 
 | 
| 1018 | 0 |   return true; | 
| 1019 | 0 | } | 
| 1020 |  |  | 
| 1021 |  | bool Scanner::rollIndent( int ToColumn | 
| 1022 |  |                         , Token::TokenKind Kind | 
| 1023 | 0 |                         , TokenQueueT::iterator InsertPoint) { | 
| 1024 | 0 |   if (FlowLevel) | 
| 1025 | 0 |     return true; | 
| 1026 | 0 |   if (Indent < ToColumn) { | 
| 1027 | 0 |     Indents.push_back(Indent); | 
| 1028 | 0 |     Indent = ToColumn; | 
| 1029 | 0 | 
 | 
| 1030 | 0 |     Token T; | 
| 1031 | 0 |     T.Kind = Kind; | 
| 1032 | 0 |     T.Range = StringRef(Current, 0); | 
| 1033 | 0 |     TokenQueue.insert(InsertPoint, T); | 
| 1034 | 0 |   } | 
| 1035 | 0 |   return true; | 
| 1036 | 0 | } | 
| 1037 |  |  | 
| 1038 | 0 | void Scanner::skipComment() { | 
| 1039 | 0 |   if (*Current != '#') | 
| 1040 | 0 |     return; | 
| 1041 | 0 |   while (true) { | 
| 1042 | 0 |     // This may skip more than one byte, thus Column is only incremented | 
| 1043 | 0 |     // for code points. | 
| 1044 | 0 |     StringRef::iterator I = skip_nb_char(Current); | 
| 1045 | 0 |     if (I == Current) | 
| 1046 | 0 |       break; | 
| 1047 | 0 |     Current = I; | 
| 1048 | 0 |     ++Column; | 
| 1049 | 0 |   } | 
| 1050 | 0 | } | 
| 1051 |  |  | 
| 1052 | 0 | void Scanner::scanToNextToken() { | 
| 1053 | 0 |   while (true) { | 
| 1054 | 0 |     while (*Current == ' ' || *Current == '\t') { | 
| 1055 | 0 |       skip(1); | 
| 1056 | 0 |     } | 
| 1057 | 0 | 
 | 
| 1058 | 0 |     skipComment(); | 
| 1059 | 0 | 
 | 
| 1060 | 0 |     // Skip EOL. | 
| 1061 | 0 |     StringRef::iterator i = skip_b_break(Current); | 
| 1062 | 0 |     if (i == Current) | 
| 1063 | 0 |       break; | 
| 1064 | 0 |     Current = i; | 
| 1065 | 0 |     ++Line; | 
| 1066 | 0 |     Column = 0; | 
| 1067 | 0 |     // New lines may start a simple key. | 
| 1068 | 0 |     if (!FlowLevel) | 
| 1069 | 0 |       IsSimpleKeyAllowed = true; | 
| 1070 | 0 |   } | 
| 1071 | 0 | } | 
| 1072 |  |  | 
| 1073 | 0 | bool Scanner::scanStreamStart() { | 
| 1074 | 0 |   IsStartOfStream = false; | 
| 1075 | 0 | 
 | 
| 1076 | 0 |   EncodingInfo EI = getUnicodeEncoding(currentInput()); | 
| 1077 | 0 | 
 | 
| 1078 | 0 |   Token T; | 
| 1079 | 0 |   T.Kind = Token::TK_StreamStart; | 
| 1080 | 0 |   T.Range = StringRef(Current, EI.second); | 
| 1081 | 0 |   TokenQueue.push_back(T); | 
| 1082 | 0 |   Current += EI.second; | 
| 1083 | 0 |   return true; | 
| 1084 | 0 | } | 
| 1085 |  |  | 
| 1086 | 0 | bool Scanner::scanStreamEnd() { | 
| 1087 | 0 |   // Force an ending new line if one isn't present. | 
| 1088 | 0 |   if (Column != 0) { | 
| 1089 | 0 |     Column = 0; | 
| 1090 | 0 |     ++Line; | 
| 1091 | 0 |   } | 
| 1092 | 0 | 
 | 
| 1093 | 0 |   unrollIndent(-1); | 
| 1094 | 0 |   SimpleKeys.clear(); | 
| 1095 | 0 |   IsSimpleKeyAllowed = false; | 
| 1096 | 0 | 
 | 
| 1097 | 0 |   Token T; | 
| 1098 | 0 |   T.Kind = Token::TK_StreamEnd; | 
| 1099 | 0 |   T.Range = StringRef(Current, 0); | 
| 1100 | 0 |   TokenQueue.push_back(T); | 
| 1101 | 0 |   return true; | 
| 1102 | 0 | } | 
| 1103 |  |  | 
| 1104 | 0 | bool Scanner::scanDirective() { | 
| 1105 | 0 |   // Reset the indentation level. | 
| 1106 | 0 |   unrollIndent(-1); | 
| 1107 | 0 |   SimpleKeys.clear(); | 
| 1108 | 0 |   IsSimpleKeyAllowed = false; | 
| 1109 | 0 | 
 | 
| 1110 | 0 |   StringRef::iterator Start = Current; | 
| 1111 | 0 |   consume('%'); | 
| 1112 | 0 |   StringRef::iterator NameStart = Current; | 
| 1113 | 0 |   Current = skip_while(&Scanner::skip_ns_char, Current); | 
| 1114 | 0 |   StringRef Name(NameStart, Current - NameStart); | 
| 1115 | 0 |   Current = skip_while(&Scanner::skip_s_white, Current); | 
| 1116 | 0 | 
 | 
| 1117 | 0 |   Token T; | 
| 1118 | 0 |   if (Name == "YAML") { | 
| 1119 | 0 |     Current = skip_while(&Scanner::skip_ns_char, Current); | 
| 1120 | 0 |     T.Kind = Token::TK_VersionDirective; | 
| 1121 | 0 |     T.Range = StringRef(Start, Current - Start); | 
| 1122 | 0 |     TokenQueue.push_back(T); | 
| 1123 | 0 |     return true; | 
| 1124 | 0 |   } else if(Name == "TAG") { | 
| 1125 | 0 |     Current = skip_while(&Scanner::skip_ns_char, Current); | 
| 1126 | 0 |     Current = skip_while(&Scanner::skip_s_white, Current); | 
| 1127 | 0 |     Current = skip_while(&Scanner::skip_ns_char, Current); | 
| 1128 | 0 |     T.Kind = Token::TK_TagDirective; | 
| 1129 | 0 |     T.Range = StringRef(Start, Current - Start); | 
| 1130 | 0 |     TokenQueue.push_back(T); | 
| 1131 | 0 |     return true; | 
| 1132 | 0 |   } | 
| 1133 | 0 |   return false; | 
| 1134 | 0 | } | 
| 1135 |  |  | 
| 1136 | 0 | bool Scanner::scanDocumentIndicator(bool IsStart) { | 
| 1137 | 0 |   unrollIndent(-1); | 
| 1138 | 0 |   SimpleKeys.clear(); | 
| 1139 | 0 |   IsSimpleKeyAllowed = false; | 
| 1140 | 0 | 
 | 
| 1141 | 0 |   Token T; | 
| 1142 | 0 |   T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; | 
| 1143 | 0 |   T.Range = StringRef(Current, 3); | 
| 1144 | 0 |   skip(3); | 
| 1145 | 0 |   TokenQueue.push_back(T); | 
| 1146 | 0 |   return true; | 
| 1147 | 0 | } | 
| 1148 |  |  | 
| 1149 | 0 | bool Scanner::scanFlowCollectionStart(bool IsSequence) { | 
| 1150 | 0 |   Token T; | 
| 1151 | 0 |   T.Kind = IsSequence ? Token::TK_FlowSequenceStart | 
| 1152 | 0 |                       : Token::TK_FlowMappingStart; | 
| 1153 | 0 |   T.Range = StringRef(Current, 1); | 
| 1154 | 0 |   skip(1); | 
| 1155 | 0 |   TokenQueue.push_back(T); | 
| 1156 | 0 | 
 | 
| 1157 | 0 |   // [ and { may begin a simple key. | 
| 1158 | 0 |   saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); | 
| 1159 | 0 | 
 | 
| 1160 | 0 |   // And may also be followed by a simple key. | 
| 1161 | 0 |   IsSimpleKeyAllowed = true; | 
| 1162 | 0 |   ++FlowLevel; | 
| 1163 | 0 |   return true; | 
| 1164 | 0 | } | 
| 1165 |  |  | 
| 1166 | 0 | bool Scanner::scanFlowCollectionEnd(bool IsSequence) { | 
| 1167 | 0 |   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | 
| 1168 | 0 |   IsSimpleKeyAllowed = false; | 
| 1169 | 0 |   Token T; | 
| 1170 | 0 |   T.Kind = IsSequence ? Token::TK_FlowSequenceEnd | 
| 1171 | 0 |                       : Token::TK_FlowMappingEnd; | 
| 1172 | 0 |   T.Range = StringRef(Current, 1); | 
| 1173 | 0 |   skip(1); | 
| 1174 | 0 |   TokenQueue.push_back(T); | 
| 1175 | 0 |   if (FlowLevel) | 
| 1176 | 0 |     --FlowLevel; | 
| 1177 | 0 |   return true; | 
| 1178 | 0 | } | 
| 1179 |  |  | 
| 1180 | 0 | bool Scanner::scanFlowEntry() { | 
| 1181 | 0 |   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | 
| 1182 | 0 |   IsSimpleKeyAllowed = true; | 
| 1183 | 0 |   Token T; | 
| 1184 | 0 |   T.Kind = Token::TK_FlowEntry; | 
| 1185 | 0 |   T.Range = StringRef(Current, 1); | 
| 1186 | 0 |   skip(1); | 
| 1187 | 0 |   TokenQueue.push_back(T); | 
| 1188 | 0 |   return true; | 
| 1189 | 0 | } | 
| 1190 |  |  | 
| 1191 | 0 | bool Scanner::scanBlockEntry() { | 
| 1192 | 0 |   rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); | 
| 1193 | 0 |   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | 
| 1194 | 0 |   IsSimpleKeyAllowed = true; | 
| 1195 | 0 |   Token T; | 
| 1196 | 0 |   T.Kind = Token::TK_BlockEntry; | 
| 1197 | 0 |   T.Range = StringRef(Current, 1); | 
| 1198 | 0 |   skip(1); | 
| 1199 | 0 |   TokenQueue.push_back(T); | 
| 1200 | 0 |   return true; | 
| 1201 | 0 | } | 
| 1202 |  |  | 
| 1203 | 0 | bool Scanner::scanKey() { | 
| 1204 | 0 |   if (!FlowLevel) | 
| 1205 | 0 |     rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); | 
| 1206 | 0 | 
 | 
| 1207 | 0 |   removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); | 
| 1208 | 0 |   IsSimpleKeyAllowed = !FlowLevel; | 
| 1209 | 0 | 
 | 
| 1210 | 0 |   Token T; | 
| 1211 | 0 |   T.Kind = Token::TK_Key; | 
| 1212 | 0 |   T.Range = StringRef(Current, 1); | 
| 1213 | 0 |   skip(1); | 
| 1214 | 0 |   TokenQueue.push_back(T); | 
| 1215 | 0 |   return true; | 
| 1216 | 0 | } | 
| 1217 |  |  | 
| 1218 | 0 | bool Scanner::scanValue() { | 
| 1219 | 0 |   // If the previous token could have been a simple key, insert the key token | 
| 1220 | 0 |   // into the token queue. | 
| 1221 | 0 |   if (!SimpleKeys.empty()) { | 
| 1222 | 0 |     SimpleKey SK = SimpleKeys.pop_back_val(); | 
| 1223 | 0 |     Token T; | 
| 1224 | 0 |     T.Kind = Token::TK_Key; | 
| 1225 | 0 |     T.Range = SK.Tok->Range; | 
| 1226 | 0 |     TokenQueueT::iterator i, e; | 
| 1227 | 0 |     for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { | 
| 1228 | 0 |       if (i == SK.Tok) | 
| 1229 | 0 |         break; | 
| 1230 | 0 |     } | 
| 1231 | 0 |     if (i == e) { | 
| 1232 | 0 |       Failed = true; | 
| 1233 | 0 |       return false; | 
| 1234 | 0 |     } | 
| 1235 | 0 |     i = TokenQueue.insert(i, T); | 
| 1236 | 0 | 
 | 
| 1237 | 0 |     // We may also need to add a Block-Mapping-Start token. | 
| 1238 | 0 |     rollIndent(SK.Column, Token::TK_BlockMappingStart, i); | 
| 1239 | 0 | 
 | 
| 1240 | 0 |     IsSimpleKeyAllowed = false; | 
| 1241 | 0 |   } else { | 
| 1242 | 0 |     if (!FlowLevel) | 
| 1243 | 0 |       rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); | 
| 1244 | 0 |     IsSimpleKeyAllowed = !FlowLevel; | 
| 1245 | 0 |   } | 
| 1246 | 0 | 
 | 
| 1247 | 0 |   Token T; | 
| 1248 | 0 |   T.Kind = Token::TK_Value; | 
| 1249 | 0 |   T.Range = StringRef(Current, 1); | 
| 1250 | 0 |   skip(1); | 
| 1251 | 0 |   TokenQueue.push_back(T); | 
| 1252 | 0 |   return true; | 
| 1253 | 0 | } | 
| 1254 |  |  | 
| 1255 |  | // Forbidding inlining improves performance by roughly 20%. | 
| 1256 |  | // FIXME: Remove once llvm optimizes this to the faster version without hints. | 
| 1257 |  | LLVM_ATTRIBUTE_NOINLINE static bool | 
| 1258 |  | wasEscaped(StringRef::iterator First, StringRef::iterator Position); | 
| 1259 |  |  | 
| 1260 |  | // Returns whether a character at 'Position' was escaped with a leading '\'. | 
| 1261 |  | // 'First' specifies the position of the first character in the string. | 
| 1262 |  | static bool wasEscaped(StringRef::iterator First, | 
| 1263 | 0 |                        StringRef::iterator Position) { | 
| 1264 | 0 |   assert(Position - 1 >= First); | 
| 1265 | 0 |   StringRef::iterator I = Position - 1; | 
| 1266 | 0 |   // We calculate the number of consecutive '\'s before the current position | 
| 1267 | 0 |   // by iterating backwards through our string. | 
| 1268 | 0 |   while (I >= First && *I == '\\') --I; | 
| 1269 | 0 |   // (Position - 1 - I) now contains the number of '\'s before the current | 
| 1270 | 0 |   // position. If it is odd, the character at 'Position' was escaped. | 
| 1271 | 0 |   return (Position - 1 - I) % 2 == 1; | 
| 1272 | 0 | } | 
| 1273 |  |  | 
| 1274 | 0 | bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { | 
| 1275 | 0 |   StringRef::iterator Start = Current; | 
| 1276 | 0 |   unsigned ColStart = Column; | 
| 1277 | 0 |   if (IsDoubleQuoted) { | 
| 1278 | 0 |     do { | 
| 1279 | 0 |       ++Current; | 
| 1280 | 0 |       while (Current != End && *Current != '"') | 
| 1281 | 0 |         ++Current; | 
| 1282 | 0 |       // Repeat until the previous character was not a '\' or was an escaped | 
| 1283 | 0 |       // backslash. | 
| 1284 | 0 |     } while (   Current != End | 
| 1285 | 0 |              && *(Current - 1) == '\\' | 
| 1286 | 0 |              && wasEscaped(Start + 1, Current)); | 
| 1287 | 0 |   } else { | 
| 1288 | 0 |     skip(1); | 
| 1289 | 0 |     while (true) { | 
| 1290 | 0 |       // Skip a ' followed by another '. | 
| 1291 | 0 |       if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { | 
| 1292 | 0 |         skip(2); | 
| 1293 | 0 |         continue; | 
| 1294 | 0 |       } else if (*Current == '\'') | 
| 1295 | 0 |         break; | 
| 1296 | 0 |       StringRef::iterator i = skip_nb_char(Current); | 
| 1297 | 0 |       if (i == Current) { | 
| 1298 | 0 |         i = skip_b_break(Current); | 
| 1299 | 0 |         if (i == Current) | 
| 1300 | 0 |           break; | 
| 1301 | 0 |         Current = i; | 
| 1302 | 0 |         Column = 0; | 
| 1303 | 0 |         ++Line; | 
| 1304 | 0 |       } else { | 
| 1305 | 0 |         if (i == End) | 
| 1306 | 0 |           break; | 
| 1307 | 0 |         Current = i; | 
| 1308 | 0 |         ++Column; | 
| 1309 | 0 |       } | 
| 1310 | 0 |     } | 
| 1311 | 0 |   } | 
| 1312 | 0 | 
 | 
| 1313 | 0 |   if (Current == End) { | 
| 1314 | 0 |     setError("Expected quote at end of scalar", Current); | 
| 1315 | 0 |     return false; | 
| 1316 | 0 |   } | 
| 1317 | 0 |  | 
| 1318 | 0 |   skip(1); // Skip ending quote. | 
| 1319 | 0 |   Token T; | 
| 1320 | 0 |   T.Kind = Token::TK_Scalar; | 
| 1321 | 0 |   T.Range = StringRef(Start, Current - Start); | 
| 1322 | 0 |   TokenQueue.push_back(T); | 
| 1323 | 0 | 
 | 
| 1324 | 0 |   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | 
| 1325 | 0 | 
 | 
| 1326 | 0 |   IsSimpleKeyAllowed = false; | 
| 1327 | 0 | 
 | 
| 1328 | 0 |   return true; | 
| 1329 | 0 | } | 
| 1330 |  |  | 
| 1331 | 0 | bool Scanner::scanPlainScalar() { | 
| 1332 | 0 |   StringRef::iterator Start = Current; | 
| 1333 | 0 |   unsigned ColStart = Column; | 
| 1334 | 0 |   unsigned LeadingBlanks = 0; | 
| 1335 | 0 |   assert(Indent >= -1 && "Indent must be >= -1 !"); | 
| 1336 | 0 |   unsigned indent = static_cast<unsigned>(Indent + 1); | 
| 1337 | 0 |   while (true) { | 
| 1338 | 0 |     if (*Current == '#') | 
| 1339 | 0 |       break; | 
| 1340 | 0 |  | 
| 1341 | 0 |     while (!isBlankOrBreak(Current)) { | 
| 1342 | 0 |       if (  FlowLevel && *Current == ':' | 
| 1343 | 0 |           && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { | 
| 1344 | 0 |         setError("Found unexpected ':' while scanning a plain scalar", Current); | 
| 1345 | 0 |         return false; | 
| 1346 | 0 |       } | 
| 1347 | 0 |  | 
| 1348 | 0 |       // Check for the end of the plain scalar. | 
| 1349 | 0 |       if (  (*Current == ':' && isBlankOrBreak(Current + 1)) | 
| 1350 | 0 |           || (  FlowLevel | 
| 1351 | 0 |           && (StringRef(Current, 1).find_first_of(",:?[]{}") | 
| 1352 | 0 |               != StringRef::npos))) | 
| 1353 | 0 |         break; | 
| 1354 | 0 |  | 
| 1355 | 0 |       StringRef::iterator i = skip_nb_char(Current); | 
| 1356 | 0 |       if (i == Current) | 
| 1357 | 0 |         break; | 
| 1358 | 0 |       Current = i; | 
| 1359 | 0 |       ++Column; | 
| 1360 | 0 |     } | 
| 1361 | 0 | 
 | 
| 1362 | 0 |     // Are we at the end? | 
| 1363 | 0 |     if (!isBlankOrBreak(Current)) | 
| 1364 | 0 |       break; | 
| 1365 | 0 |  | 
| 1366 | 0 |     // Eat blanks. | 
| 1367 | 0 |     StringRef::iterator Tmp = Current; | 
| 1368 | 0 |     while (isBlankOrBreak(Tmp)) { | 
| 1369 | 0 |       StringRef::iterator i = skip_s_white(Tmp); | 
| 1370 | 0 |       if (i != Tmp) { | 
| 1371 | 0 |         if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { | 
| 1372 | 0 |           setError("Found invalid tab character in indentation", Tmp); | 
| 1373 | 0 |           return false; | 
| 1374 | 0 |         } | 
| 1375 | 0 |         Tmp = i; | 
| 1376 | 0 |         ++Column; | 
| 1377 | 0 |       } else { | 
| 1378 | 0 |         i = skip_b_break(Tmp); | 
| 1379 | 0 |         if (!LeadingBlanks) | 
| 1380 | 0 |           LeadingBlanks = 1; | 
| 1381 | 0 |         Tmp = i; | 
| 1382 | 0 |         Column = 0; | 
| 1383 | 0 |         ++Line; | 
| 1384 | 0 |       } | 
| 1385 | 0 |     } | 
| 1386 | 0 | 
 | 
| 1387 | 0 |     if (!FlowLevel && Column < indent) | 
| 1388 | 0 |       break; | 
| 1389 | 0 |  | 
| 1390 | 0 |     Current = Tmp; | 
| 1391 | 0 |   } | 
| 1392 | 0 |   if (Start == Current) { | 
| 1393 | 0 |     setError("Got empty plain scalar", Start); | 
| 1394 | 0 |     return false; | 
| 1395 | 0 |   } | 
| 1396 | 0 |   Token T; | 
| 1397 | 0 |   T.Kind = Token::TK_Scalar; | 
| 1398 | 0 |   T.Range = StringRef(Start, Current - Start); | 
| 1399 | 0 |   TokenQueue.push_back(T); | 
| 1400 | 0 | 
 | 
| 1401 | 0 |   // Plain scalars can be simple keys. | 
| 1402 | 0 |   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | 
| 1403 | 0 | 
 | 
| 1404 | 0 |   IsSimpleKeyAllowed = false; | 
| 1405 | 0 | 
 | 
| 1406 | 0 |   return true; | 
| 1407 | 0 | } | 
| 1408 |  |  | 
| 1409 | 0 | bool Scanner::scanAliasOrAnchor(bool IsAlias) { | 
| 1410 | 0 |   StringRef::iterator Start = Current; | 
| 1411 | 0 |   unsigned ColStart = Column; | 
| 1412 | 0 |   skip(1); | 
| 1413 | 0 |   while(true) { | 
| 1414 | 0 |     if (   *Current == '[' || *Current == ']' | 
| 1415 | 0 |         || *Current == '{' || *Current == '}' | 
| 1416 | 0 |         || *Current == ',' | 
| 1417 | 0 |         || *Current == ':') | 
| 1418 | 0 |       break; | 
| 1419 | 0 |     StringRef::iterator i = skip_ns_char(Current); | 
| 1420 | 0 |     if (i == Current) | 
| 1421 | 0 |       break; | 
| 1422 | 0 |     Current = i; | 
| 1423 | 0 |     ++Column; | 
| 1424 | 0 |   } | 
| 1425 | 0 | 
 | 
| 1426 | 0 |   if (Start == Current) { | 
| 1427 | 0 |     setError("Got empty alias or anchor", Start); | 
| 1428 | 0 |     return false; | 
| 1429 | 0 |   } | 
| 1430 | 0 |  | 
| 1431 | 0 |   Token T; | 
| 1432 | 0 |   T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; | 
| 1433 | 0 |   T.Range = StringRef(Start, Current - Start); | 
| 1434 | 0 |   TokenQueue.push_back(T); | 
| 1435 | 0 | 
 | 
| 1436 | 0 |   // Alias and anchors can be simple keys. | 
| 1437 | 0 |   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | 
| 1438 | 0 | 
 | 
| 1439 | 0 |   IsSimpleKeyAllowed = false; | 
| 1440 | 0 | 
 | 
| 1441 | 0 |   return true; | 
| 1442 | 0 | } | 
| 1443 |  |  | 
| 1444 | 0 | char Scanner::scanBlockChompingIndicator() { | 
| 1445 | 0 |   char Indicator = ' '; | 
| 1446 | 0 |   if (Current != End && (*Current == '+' || *Current == '-')) { | 
| 1447 | 0 |     Indicator = *Current; | 
| 1448 | 0 |     skip(1); | 
| 1449 | 0 |   } | 
| 1450 | 0 |   return Indicator; | 
| 1451 | 0 | } | 
| 1452 |  |  | 
| 1453 |  | /// Get the number of line breaks after chomping. | 
| 1454 |  | /// | 
| 1455 |  | /// Return the number of trailing line breaks to emit, depending on | 
| 1456 |  | /// \p ChompingIndicator. | 
| 1457 |  | static unsigned getChompedLineBreaks(char ChompingIndicator, | 
| 1458 | 0 |                                      unsigned LineBreaks, StringRef Str) { | 
| 1459 | 0 |   if (ChompingIndicator == '-') // Strip all line breaks. | 
| 1460 | 0 |     return 0; | 
| 1461 | 0 |   if (ChompingIndicator == '+') // Keep all line breaks. | 
| 1462 | 0 |     return LineBreaks; | 
| 1463 | 0 |   // Clip trailing lines. | 
| 1464 | 0 |   return Str.empty() ? 0 : 1; | 
| 1465 | 0 | } | 
| 1466 |  |  | 
| 1467 | 0 | unsigned Scanner::scanBlockIndentationIndicator() { | 
| 1468 | 0 |   unsigned Indent = 0; | 
| 1469 | 0 |   if (Current != End && (*Current >= '1' && *Current <= '9')) { | 
| 1470 | 0 |     Indent = unsigned(*Current - '0'); | 
| 1471 | 0 |     skip(1); | 
| 1472 | 0 |   } | 
| 1473 | 0 |   return Indent; | 
| 1474 | 0 | } | 
| 1475 |  |  | 
| 1476 |  | bool Scanner::scanBlockScalarHeader(char &ChompingIndicator, | 
| 1477 | 0 |                                     unsigned &IndentIndicator, bool &IsDone) { | 
| 1478 | 0 |   auto Start = Current; | 
| 1479 | 0 | 
 | 
| 1480 | 0 |   ChompingIndicator = scanBlockChompingIndicator(); | 
| 1481 | 0 |   IndentIndicator = scanBlockIndentationIndicator(); | 
| 1482 | 0 |   // Check for the chomping indicator once again. | 
| 1483 | 0 |   if (ChompingIndicator == ' ') | 
| 1484 | 0 |     ChompingIndicator = scanBlockChompingIndicator(); | 
| 1485 | 0 |   Current = skip_while(&Scanner::skip_s_white, Current); | 
| 1486 | 0 |   skipComment(); | 
| 1487 | 0 | 
 | 
| 1488 | 0 |   if (Current == End) { // EOF, we have an empty scalar. | 
| 1489 | 0 |     Token T; | 
| 1490 | 0 |     T.Kind = Token::TK_BlockScalar; | 
| 1491 | 0 |     T.Range = StringRef(Start, Current - Start); | 
| 1492 | 0 |     TokenQueue.push_back(T); | 
| 1493 | 0 |     IsDone = true; | 
| 1494 | 0 |     return true; | 
| 1495 | 0 |   } | 
| 1496 | 0 |  | 
| 1497 | 0 |   if (!consumeLineBreakIfPresent()) { | 
| 1498 | 0 |     setError("Expected a line break after block scalar header", Current); | 
| 1499 | 0 |     return false; | 
| 1500 | 0 |   } | 
| 1501 | 0 |   return true; | 
| 1502 | 0 | } | 
| 1503 |  |  | 
| 1504 |  | bool Scanner::findBlockScalarIndent(unsigned &BlockIndent, | 
| 1505 |  |                                     unsigned BlockExitIndent, | 
| 1506 | 0 |                                     unsigned &LineBreaks, bool &IsDone) { | 
| 1507 | 0 |   unsigned MaxAllSpaceLineCharacters = 0; | 
| 1508 | 0 |   StringRef::iterator LongestAllSpaceLine; | 
| 1509 | 0 | 
 | 
| 1510 | 0 |   while (true) { | 
| 1511 | 0 |     advanceWhile(&Scanner::skip_s_space); | 
| 1512 | 0 |     if (skip_nb_char(Current) != Current) { | 
| 1513 | 0 |       // This line isn't empty, so try and find the indentation. | 
| 1514 | 0 |       if (Column <= BlockExitIndent) { // End of the block literal. | 
| 1515 | 0 |         IsDone = true; | 
| 1516 | 0 |         return true; | 
| 1517 | 0 |       } | 
| 1518 | 0 |       // We found the block's indentation. | 
| 1519 | 0 |       BlockIndent = Column; | 
| 1520 | 0 |       if (MaxAllSpaceLineCharacters > BlockIndent) { | 
| 1521 | 0 |         setError( | 
| 1522 | 0 |             "Leading all-spaces line must be smaller than the block indent", | 
| 1523 | 0 |             LongestAllSpaceLine); | 
| 1524 | 0 |         return false; | 
| 1525 | 0 |       } | 
| 1526 | 0 |       return true; | 
| 1527 | 0 |     } | 
| 1528 | 0 |     if (skip_b_break(Current) != Current && | 
| 1529 | 0 |         Column > MaxAllSpaceLineCharacters) { | 
| 1530 | 0 |       // Record the longest all-space line in case it's longer than the | 
| 1531 | 0 |       // discovered block indent. | 
| 1532 | 0 |       MaxAllSpaceLineCharacters = Column; | 
| 1533 | 0 |       LongestAllSpaceLine = Current; | 
| 1534 | 0 |     } | 
| 1535 | 0 | 
 | 
| 1536 | 0 |     // Check for EOF. | 
| 1537 | 0 |     if (Current == End) { | 
| 1538 | 0 |       IsDone = true; | 
| 1539 | 0 |       return true; | 
| 1540 | 0 |     } | 
| 1541 | 0 |  | 
| 1542 | 0 |     if (!consumeLineBreakIfPresent()) { | 
| 1543 | 0 |       IsDone = true; | 
| 1544 | 0 |       return true; | 
| 1545 | 0 |     } | 
| 1546 | 0 |     ++LineBreaks; | 
| 1547 | 0 |   } | 
| 1548 | 0 |   return true; | 
| 1549 | 0 | } | 
| 1550 |  |  | 
| 1551 |  | bool Scanner::scanBlockScalarIndent(unsigned BlockIndent, | 
| 1552 | 0 |                                     unsigned BlockExitIndent, bool &IsDone) { | 
| 1553 | 0 |   // Skip the indentation. | 
| 1554 | 0 |   while (Column < BlockIndent) { | 
| 1555 | 0 |     auto I = skip_s_space(Current); | 
| 1556 | 0 |     if (I == Current) | 
| 1557 | 0 |       break; | 
| 1558 | 0 |     Current = I; | 
| 1559 | 0 |     ++Column; | 
| 1560 | 0 |   } | 
| 1561 | 0 | 
 | 
| 1562 | 0 |   if (skip_nb_char(Current) == Current) | 
| 1563 | 0 |     return true; | 
| 1564 | 0 |  | 
| 1565 | 0 |   if (Column <= BlockExitIndent) { // End of the block literal. | 
| 1566 | 0 |     IsDone = true; | 
| 1567 | 0 |     return true; | 
| 1568 | 0 |   } | 
| 1569 | 0 |  | 
| 1570 | 0 |   if (Column < BlockIndent) { | 
| 1571 | 0 |     if (Current != End && *Current == '#') { // Trailing comment. | 
| 1572 | 0 |       IsDone = true; | 
| 1573 | 0 |       return true; | 
| 1574 | 0 |     } | 
| 1575 | 0 |     setError("A text line is less indented than the block scalar", Current); | 
| 1576 | 0 |     return false; | 
| 1577 | 0 |   } | 
| 1578 | 0 |   return true; // A normal text line. | 
| 1579 | 0 | } | 
| 1580 |  |  | 
| 1581 | 0 | bool Scanner::scanBlockScalar(bool IsLiteral) { | 
| 1582 | 0 |   // Eat '|' or '>' | 
| 1583 | 0 |   assert(*Current == '|' || *Current == '>'); | 
| 1584 | 0 |   skip(1); | 
| 1585 | 0 | 
 | 
| 1586 | 0 |   char ChompingIndicator; | 
| 1587 | 0 |   unsigned BlockIndent; | 
| 1588 | 0 |   bool IsDone = false; | 
| 1589 | 0 |   if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone)) | 
| 1590 | 0 |     return false; | 
| 1591 | 0 |   if (IsDone) | 
| 1592 | 0 |     return true; | 
| 1593 | 0 |  | 
| 1594 | 0 |   auto Start = Current; | 
| 1595 | 0 |   unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent; | 
| 1596 | 0 |   unsigned LineBreaks = 0; | 
| 1597 | 0 |   if (BlockIndent == 0) { | 
| 1598 | 0 |     if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks, | 
| 1599 | 0 |                                IsDone)) | 
| 1600 | 0 |       return false; | 
| 1601 | 0 |   } | 
| 1602 | 0 |  | 
| 1603 | 0 |   // Scan the block's scalars body. | 
| 1604 | 0 |   SmallString<256> Str; | 
| 1605 | 0 |   while (!IsDone) { | 
| 1606 | 0 |     if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone)) | 
| 1607 | 0 |       return false; | 
| 1608 | 0 |     if (IsDone) | 
| 1609 | 0 |       break; | 
| 1610 | 0 |  | 
| 1611 | 0 |     // Parse the current line. | 
| 1612 | 0 |     auto LineStart = Current; | 
| 1613 | 0 |     advanceWhile(&Scanner::skip_nb_char); | 
| 1614 | 0 |     if (LineStart != Current) { | 
| 1615 | 0 |       Str.append(LineBreaks, '\n'); | 
| 1616 | 0 |       Str.append(StringRef(LineStart, Current - LineStart)); | 
| 1617 | 0 |       LineBreaks = 0; | 
| 1618 | 0 |     } | 
| 1619 | 0 | 
 | 
| 1620 | 0 |     // Check for EOF. | 
| 1621 | 0 |     if (Current == End) | 
| 1622 | 0 |       break; | 
| 1623 | 0 |  | 
| 1624 | 0 |     if (!consumeLineBreakIfPresent()) | 
| 1625 | 0 |       break; | 
| 1626 | 0 |     ++LineBreaks; | 
| 1627 | 0 |   } | 
| 1628 | 0 | 
 | 
| 1629 | 0 |   if (Current == End && !LineBreaks) | 
| 1630 | 0 |     // Ensure that there is at least one line break before the end of file. | 
| 1631 | 0 |     LineBreaks = 1; | 
| 1632 | 0 |   Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n'); | 
| 1633 | 0 | 
 | 
| 1634 | 0 |   // New lines may start a simple key. | 
| 1635 | 0 |   if (!FlowLevel) | 
| 1636 | 0 |     IsSimpleKeyAllowed = true; | 
| 1637 | 0 | 
 | 
| 1638 | 0 |   Token T; | 
| 1639 | 0 |   T.Kind = Token::TK_BlockScalar; | 
| 1640 | 0 |   T.Range = StringRef(Start, Current - Start); | 
| 1641 | 0 |   T.Value = std::string(Str); | 
| 1642 | 0 |   TokenQueue.push_back(T); | 
| 1643 | 0 |   return true; | 
| 1644 | 0 | } | 
| 1645 |  |  | 
| 1646 | 0 | bool Scanner::scanTag() { | 
| 1647 | 0 |   StringRef::iterator Start = Current; | 
| 1648 | 0 |   unsigned ColStart = Column; | 
| 1649 | 0 |   skip(1); // Eat !. | 
| 1650 | 0 |   if (Current == End || isBlankOrBreak(Current)); // An empty tag. | 
| 1651 | 0 |   else if (*Current == '<') { | 
| 1652 | 0 |     skip(1); | 
| 1653 | 0 |     scan_ns_uri_char(); | 
| 1654 | 0 |     if (!consume('>')) | 
| 1655 | 0 |       return false; | 
| 1656 | 0 |   } else { | 
| 1657 | 0 |     // FIXME: Actually parse the c-ns-shorthand-tag rule. | 
| 1658 | 0 |     Current = skip_while(&Scanner::skip_ns_char, Current); | 
| 1659 | 0 |   } | 
| 1660 | 0 | 
 | 
| 1661 | 0 |   Token T; | 
| 1662 | 0 |   T.Kind = Token::TK_Tag; | 
| 1663 | 0 |   T.Range = StringRef(Start, Current - Start); | 
| 1664 | 0 |   TokenQueue.push_back(T); | 
| 1665 | 0 | 
 | 
| 1666 | 0 |   // Tags can be simple keys. | 
| 1667 | 0 |   saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); | 
| 1668 | 0 | 
 | 
| 1669 | 0 |   IsSimpleKeyAllowed = false; | 
| 1670 | 0 | 
 | 
| 1671 | 0 |   return true; | 
| 1672 | 0 | } | 
| 1673 |  |  | 
| 1674 | 0 | bool Scanner::fetchMoreTokens() { | 
| 1675 | 0 |   if (IsStartOfStream) | 
| 1676 | 0 |     return scanStreamStart(); | 
| 1677 | 0 |  | 
| 1678 | 0 |   scanToNextToken(); | 
| 1679 | 0 | 
 | 
| 1680 | 0 |   if (Current == End) | 
| 1681 | 0 |     return scanStreamEnd(); | 
| 1682 | 0 |  | 
| 1683 | 0 |   removeStaleSimpleKeyCandidates(); | 
| 1684 | 0 | 
 | 
| 1685 | 0 |   unrollIndent(Column); | 
| 1686 | 0 | 
 | 
| 1687 | 0 |   if (Column == 0 && *Current == '%') | 
| 1688 | 0 |     return scanDirective(); | 
| 1689 | 0 |  | 
| 1690 | 0 |   if (Column == 0 && Current + 4 <= End | 
| 1691 | 0 |       && *Current == '-' | 
| 1692 | 0 |       && *(Current + 1) == '-' | 
| 1693 | 0 |       && *(Current + 2) == '-' | 
| 1694 | 0 |       && (Current + 3 == End || isBlankOrBreak(Current + 3))) | 
| 1695 | 0 |     return scanDocumentIndicator(true); | 
| 1696 | 0 |  | 
| 1697 | 0 |   if (Column == 0 && Current + 4 <= End | 
| 1698 | 0 |       && *Current == '.' | 
| 1699 | 0 |       && *(Current + 1) == '.' | 
| 1700 | 0 |       && *(Current + 2) == '.' | 
| 1701 | 0 |       && (Current + 3 == End || isBlankOrBreak(Current + 3))) | 
| 1702 | 0 |     return scanDocumentIndicator(false); | 
| 1703 | 0 |  | 
| 1704 | 0 |   if (*Current == '[') | 
| 1705 | 0 |     return scanFlowCollectionStart(true); | 
| 1706 | 0 |  | 
| 1707 | 0 |   if (*Current == '{') | 
| 1708 | 0 |     return scanFlowCollectionStart(false); | 
| 1709 | 0 |  | 
| 1710 | 0 |   if (*Current == ']') | 
| 1711 | 0 |     return scanFlowCollectionEnd(true); | 
| 1712 | 0 |  | 
| 1713 | 0 |   if (*Current == '}') | 
| 1714 | 0 |     return scanFlowCollectionEnd(false); | 
| 1715 | 0 |  | 
| 1716 | 0 |   if (*Current == ',') | 
| 1717 | 0 |     return scanFlowEntry(); | 
| 1718 | 0 |  | 
| 1719 | 0 |   if (*Current == '-' && isBlankOrBreak(Current + 1)) | 
| 1720 | 0 |     return scanBlockEntry(); | 
| 1721 | 0 |  | 
| 1722 | 0 |   if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) | 
| 1723 | 0 |     return scanKey(); | 
| 1724 | 0 |  | 
| 1725 | 0 |   if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) | 
| 1726 | 0 |     return scanValue(); | 
| 1727 | 0 |  | 
| 1728 | 0 |   if (*Current == '*') | 
| 1729 | 0 |     return scanAliasOrAnchor(true); | 
| 1730 | 0 |  | 
| 1731 | 0 |   if (*Current == '&') | 
| 1732 | 0 |     return scanAliasOrAnchor(false); | 
| 1733 | 0 |  | 
| 1734 | 0 |   if (*Current == '!') | 
| 1735 | 0 |     return scanTag(); | 
| 1736 | 0 |  | 
| 1737 | 0 |   if (*Current == '|' && !FlowLevel) | 
| 1738 | 0 |     return scanBlockScalar(true); | 
| 1739 | 0 |  | 
| 1740 | 0 |   if (*Current == '>' && !FlowLevel) | 
| 1741 | 0 |     return scanBlockScalar(false); | 
| 1742 | 0 |  | 
| 1743 | 0 |   if (*Current == '\'') | 
| 1744 | 0 |     return scanFlowScalar(false); | 
| 1745 | 0 |  | 
| 1746 | 0 |   if (*Current == '"') | 
| 1747 | 0 |     return scanFlowScalar(true); | 
| 1748 | 0 |  | 
| 1749 | 0 |   // Get a plain scalar. | 
| 1750 | 0 |   StringRef FirstChar(Current, 1); | 
| 1751 | 0 |   if (!(isBlankOrBreak(Current) | 
| 1752 | 0 |         || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) | 
| 1753 | 0 |       || (*Current == '-' && !isBlankOrBreak(Current + 1)) | 
| 1754 | 0 |       || (!FlowLevel && (*Current == '?' || *Current == ':') | 
| 1755 | 0 |           && isBlankOrBreak(Current + 1)) | 
| 1756 | 0 |       || (!FlowLevel && *Current == ':' | 
| 1757 | 0 |                       && Current + 2 < End | 
| 1758 | 0 |                       && *(Current + 1) == ':' | 
| 1759 | 0 |                       && !isBlankOrBreak(Current + 2))) | 
| 1760 | 0 |     return scanPlainScalar(); | 
| 1761 | 0 |  | 
| 1762 | 0 |   setError("Unrecognized character while tokenizing.", Current); | 
| 1763 | 0 |   return false; | 
| 1764 | 0 | } | 
| 1765 |  |  | 
| 1766 |  | Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors, | 
| 1767 |  |                std::error_code *EC) | 
| 1768 | 0 |     : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {} | 
| 1769 |  |  | 
| 1770 |  | Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors, | 
| 1771 |  |                std::error_code *EC) | 
| 1772 | 0 |     : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {} | 
| 1773 |  |  | 
| 1774 | 0 | Stream::~Stream() = default; | 
| 1775 |  |  | 
| 1776 | 0 | bool Stream::failed() { return scanner->failed(); } | 
| 1777 |  |  | 
| 1778 | 0 | void Stream::printError(Node *N, const Twine &Msg) { | 
| 1779 | 0 |   SMRange Range = N ? N->getSourceRange() : SMRange(); | 
| 1780 | 0 |   scanner->printError( Range.Start | 
| 1781 | 0 |                      , SourceMgr::DK_Error | 
| 1782 | 0 |                      , Msg | 
| 1783 | 0 |                      , Range); | 
| 1784 | 0 | } | 
| 1785 |  |  | 
| 1786 | 0 | document_iterator Stream::begin() { | 
| 1787 | 0 |   if (CurrentDoc) | 
| 1788 | 0 |     report_fatal_error("Can only iterate over the stream once"); | 
| 1789 | 0 |  | 
| 1790 | 0 |   // Skip Stream-Start. | 
| 1791 | 0 |   scanner->getNext(); | 
| 1792 | 0 | 
 | 
| 1793 | 0 |   CurrentDoc.reset(new Document(*this)); | 
| 1794 | 0 |   return document_iterator(CurrentDoc); | 
| 1795 | 0 | } | 
| 1796 |  |  | 
| 1797 | 0 | document_iterator Stream::end() { | 
| 1798 | 0 |   return document_iterator(); | 
| 1799 | 0 | } | 
| 1800 |  |  | 
| 1801 | 0 | void Stream::skip() { | 
| 1802 | 0 |   for (document_iterator i = begin(), e = end(); i != e; ++i) | 
| 1803 | 0 |     i->skip(); | 
| 1804 | 0 | } | 
| 1805 |  |  | 
| 1806 |  | Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A, | 
| 1807 |  |            StringRef T) | 
| 1808 | 0 |     : Doc(D), TypeID(Type), Anchor(A), Tag(T) { | 
| 1809 | 0 |   SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); | 
| 1810 | 0 |   SourceRange = SMRange(Start, Start); | 
| 1811 | 0 | } | 
| 1812 |  |  | 
| 1813 | 0 | std::string Node::getVerbatimTag() const { | 
| 1814 | 0 |   StringRef Raw = getRawTag(); | 
| 1815 | 0 |   if (!Raw.empty() && Raw != "!") { | 
| 1816 | 0 |     std::string Ret; | 
| 1817 | 0 |     if (Raw.find_last_of('!') == 0) { | 
| 1818 | 0 |       Ret = std::string(Doc->getTagMap().find("!")->second); | 
| 1819 | 0 |       Ret += Raw.substr(1); | 
| 1820 | 0 |       return Ret; | 
| 1821 | 0 |     } else if (Raw.startswith("!!")) { | 
| 1822 | 0 |       Ret = std::string(Doc->getTagMap().find("!!")->second); | 
| 1823 | 0 |       Ret += Raw.substr(2); | 
| 1824 | 0 |       return Ret; | 
| 1825 | 0 |     } else { | 
| 1826 | 0 |       StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); | 
| 1827 | 0 |       std::map<StringRef, StringRef>::const_iterator It = | 
| 1828 | 0 |           Doc->getTagMap().find(TagHandle); | 
| 1829 | 0 |       if (It != Doc->getTagMap().end()) | 
| 1830 | 0 |         Ret = std::string(It->second); | 
| 1831 | 0 |       else { | 
| 1832 | 0 |         Token T; | 
| 1833 | 0 |         T.Kind = Token::TK_Tag; | 
| 1834 | 0 |         T.Range = TagHandle; | 
| 1835 | 0 |         setError(Twine("Unknown tag handle ") + TagHandle, T); | 
| 1836 | 0 |       } | 
| 1837 | 0 |       Ret += Raw.substr(Raw.find_last_of('!') + 1); | 
| 1838 | 0 |       return Ret; | 
| 1839 | 0 |     } | 
| 1840 | 0 |   } | 
| 1841 | 0 | 
 | 
| 1842 | 0 |   switch (getType()) { | 
| 1843 | 0 |   case NK_Null: | 
| 1844 | 0 |     return "tag:yaml.org,2002:null"; | 
| 1845 | 0 |   case NK_Scalar: | 
| 1846 | 0 |   case NK_BlockScalar: | 
| 1847 | 0 |     // TODO: Tag resolution. | 
| 1848 | 0 |     return "tag:yaml.org,2002:str"; | 
| 1849 | 0 |   case NK_Mapping: | 
| 1850 | 0 |     return "tag:yaml.org,2002:map"; | 
| 1851 | 0 |   case NK_Sequence: | 
| 1852 | 0 |     return "tag:yaml.org,2002:seq"; | 
| 1853 | 0 |   } | 
| 1854 | 0 |  | 
| 1855 | 0 |   return ""; | 
| 1856 | 0 | } | 
| 1857 |  |  | 
| 1858 | 0 | Token &Node::peekNext() { | 
| 1859 | 0 |   return Doc->peekNext(); | 
| 1860 | 0 | } | 
| 1861 |  |  | 
| 1862 | 0 | Token Node::getNext() { | 
| 1863 | 0 |   return Doc->getNext(); | 
| 1864 | 0 | } | 
| 1865 |  |  | 
| 1866 | 0 | Node *Node::parseBlockNode() { | 
| 1867 | 0 |   return Doc->parseBlockNode(); | 
| 1868 | 0 | } | 
| 1869 |  |  | 
| 1870 | 0 | BumpPtrAllocator &Node::getAllocator() { | 
| 1871 | 0 |   return Doc->NodeAllocator; | 
| 1872 | 0 | } | 
| 1873 |  |  | 
| 1874 | 0 | void Node::setError(const Twine &Msg, Token &Tok) const { | 
| 1875 | 0 |   Doc->setError(Msg, Tok); | 
| 1876 | 0 | } | 
| 1877 |  |  | 
| 1878 | 0 | bool Node::failed() const { | 
| 1879 | 0 |   return Doc->failed(); | 
| 1880 | 0 | } | 
| 1881 |  |  | 
| 1882 | 0 | StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { | 
| 1883 | 0 |   // TODO: Handle newlines properly. We need to remove leading whitespace. | 
| 1884 | 0 |   if (Value[0] == '"') { // Double quoted. | 
| 1885 | 0 |     // Pull off the leading and trailing "s. | 
| 1886 | 0 |     StringRef UnquotedValue = Value.substr(1, Value.size() - 2); | 
| 1887 | 0 |     // Search for characters that would require unescaping the value. | 
| 1888 | 0 |     StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); | 
| 1889 | 0 |     if (i != StringRef::npos) | 
| 1890 | 0 |       return unescapeDoubleQuoted(UnquotedValue, i, Storage); | 
| 1891 | 0 |     return UnquotedValue; | 
| 1892 | 0 |   } else if (Value[0] == '\'') { // Single quoted. | 
| 1893 | 0 |     // Pull off the leading and trailing 's. | 
| 1894 | 0 |     StringRef UnquotedValue = Value.substr(1, Value.size() - 2); | 
| 1895 | 0 |     StringRef::size_type i = UnquotedValue.find('\''); | 
| 1896 | 0 |     if (i != StringRef::npos) { | 
| 1897 | 0 |       // We're going to need Storage. | 
| 1898 | 0 |       Storage.clear(); | 
| 1899 | 0 |       Storage.reserve(UnquotedValue.size()); | 
| 1900 | 0 |       for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { | 
| 1901 | 0 |         StringRef Valid(UnquotedValue.begin(), i); | 
| 1902 | 0 |         Storage.insert(Storage.end(), Valid.begin(), Valid.end()); | 
| 1903 | 0 |         Storage.push_back('\''); | 
| 1904 | 0 |         UnquotedValue = UnquotedValue.substr(i + 2); | 
| 1905 | 0 |       } | 
| 1906 | 0 |       Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); | 
| 1907 | 0 |       return StringRef(Storage.begin(), Storage.size()); | 
| 1908 | 0 |     } | 
| 1909 | 0 |     return UnquotedValue; | 
| 1910 | 0 |   } | 
| 1911 | 0 |   // Plain or block. | 
| 1912 | 0 |   return Value.rtrim(' '); | 
| 1913 | 0 | } | 
| 1914 |  |  | 
| 1915 |  | StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue | 
| 1916 |  |                                           , StringRef::size_type i | 
| 1917 |  |                                           , SmallVectorImpl<char> &Storage) | 
| 1918 | 0 |                                           const { | 
| 1919 | 0 |   // Use Storage to build proper value. | 
| 1920 | 0 |   Storage.clear(); | 
| 1921 | 0 |   Storage.reserve(UnquotedValue.size()); | 
| 1922 | 0 |   for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { | 
| 1923 | 0 |     // Insert all previous chars into Storage. | 
| 1924 | 0 |     StringRef Valid(UnquotedValue.begin(), i); | 
| 1925 | 0 |     Storage.insert(Storage.end(), Valid.begin(), Valid.end()); | 
| 1926 | 0 |     // Chop off inserted chars. | 
| 1927 | 0 |     UnquotedValue = UnquotedValue.substr(i); | 
| 1928 | 0 | 
 | 
| 1929 | 0 |     assert(!UnquotedValue.empty() && "Can't be empty!"); | 
| 1930 | 0 | 
 | 
| 1931 | 0 |     // Parse escape or line break. | 
| 1932 | 0 |     switch (UnquotedValue[0]) { | 
| 1933 | 0 |     case '\r': | 
| 1934 | 0 |     case '\n': | 
| 1935 | 0 |       Storage.push_back('\n'); | 
| 1936 | 0 |       if (   UnquotedValue.size() > 1 | 
| 1937 | 0 |           && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) | 
| 1938 | 0 |         UnquotedValue = UnquotedValue.substr(1); | 
| 1939 | 0 |       UnquotedValue = UnquotedValue.substr(1); | 
| 1940 | 0 |       break; | 
| 1941 | 0 |     default: | 
| 1942 | 0 |       if (UnquotedValue.size() == 1) { | 
| 1943 | 0 |         Token T; | 
| 1944 | 0 |         T.Range = StringRef(UnquotedValue.begin(), 1); | 
| 1945 | 0 |         setError("Unrecognized escape code", T); | 
| 1946 | 0 |         return ""; | 
| 1947 | 0 |       } | 
| 1948 | 0 |       UnquotedValue = UnquotedValue.substr(1); | 
| 1949 | 0 |       switch (UnquotedValue[0]) { | 
| 1950 | 0 |       default: { | 
| 1951 | 0 |           Token T; | 
| 1952 | 0 |           T.Range = StringRef(UnquotedValue.begin(), 1); | 
| 1953 | 0 |           setError("Unrecognized escape code", T); | 
| 1954 | 0 |           return ""; | 
| 1955 | 0 |         } | 
| 1956 | 0 |       case '\r': | 
| 1957 | 0 |       case '\n': | 
| 1958 | 0 |         // Remove the new line. | 
| 1959 | 0 |         if (   UnquotedValue.size() > 1 | 
| 1960 | 0 |             && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) | 
| 1961 | 0 |           UnquotedValue = UnquotedValue.substr(1); | 
| 1962 | 0 |         // If this was just a single byte newline, it will get skipped | 
| 1963 | 0 |         // below. | 
| 1964 | 0 |         break; | 
| 1965 | 0 |       case '0': | 
| 1966 | 0 |         Storage.push_back(0x00); | 
| 1967 | 0 |         break; | 
| 1968 | 0 |       case 'a': | 
| 1969 | 0 |         Storage.push_back(0x07); | 
| 1970 | 0 |         break; | 
| 1971 | 0 |       case 'b': | 
| 1972 | 0 |         Storage.push_back(0x08); | 
| 1973 | 0 |         break; | 
| 1974 | 0 |       case 't': | 
| 1975 | 0 |       case 0x09: | 
| 1976 | 0 |         Storage.push_back(0x09); | 
| 1977 | 0 |         break; | 
| 1978 | 0 |       case 'n': | 
| 1979 | 0 |         Storage.push_back(0x0A); | 
| 1980 | 0 |         break; | 
| 1981 | 0 |       case 'v': | 
| 1982 | 0 |         Storage.push_back(0x0B); | 
| 1983 | 0 |         break; | 
| 1984 | 0 |       case 'f': | 
| 1985 | 0 |         Storage.push_back(0x0C); | 
| 1986 | 0 |         break; | 
| 1987 | 0 |       case 'r': | 
| 1988 | 0 |         Storage.push_back(0x0D); | 
| 1989 | 0 |         break; | 
| 1990 | 0 |       case 'e': | 
| 1991 | 0 |         Storage.push_back(0x1B); | 
| 1992 | 0 |         break; | 
| 1993 | 0 |       case ' ': | 
| 1994 | 0 |         Storage.push_back(0x20); | 
| 1995 | 0 |         break; | 
| 1996 | 0 |       case '"': | 
| 1997 | 0 |         Storage.push_back(0x22); | 
| 1998 | 0 |         break; | 
| 1999 | 0 |       case '/': | 
| 2000 | 0 |         Storage.push_back(0x2F); | 
| 2001 | 0 |         break; | 
| 2002 | 0 |       case '\\': | 
| 2003 | 0 |         Storage.push_back(0x5C); | 
| 2004 | 0 |         break; | 
| 2005 | 0 |       case 'N': | 
| 2006 | 0 |         encodeUTF8(0x85, Storage); | 
| 2007 | 0 |         break; | 
| 2008 | 0 |       case '_': | 
| 2009 | 0 |         encodeUTF8(0xA0, Storage); | 
| 2010 | 0 |         break; | 
| 2011 | 0 |       case 'L': | 
| 2012 | 0 |         encodeUTF8(0x2028, Storage); | 
| 2013 | 0 |         break; | 
| 2014 | 0 |       case 'P': | 
| 2015 | 0 |         encodeUTF8(0x2029, Storage); | 
| 2016 | 0 |         break; | 
| 2017 | 0 |       case 'x': { | 
| 2018 | 0 |           if (UnquotedValue.size() < 3) | 
| 2019 | 0 |             // TODO: Report error. | 
| 2020 | 0 |             break; | 
| 2021 | 0 |           unsigned int UnicodeScalarValue; | 
| 2022 | 0 |           if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) | 
| 2023 | 0 |             // TODO: Report error. | 
| 2024 | 0 |             UnicodeScalarValue = 0xFFFD; | 
| 2025 | 0 |           encodeUTF8(UnicodeScalarValue, Storage); | 
| 2026 | 0 |           UnquotedValue = UnquotedValue.substr(2); | 
| 2027 | 0 |           break; | 
| 2028 | 0 |         } | 
| 2029 | 0 |       case 'u': { | 
| 2030 | 0 |           if (UnquotedValue.size() < 5) | 
| 2031 | 0 |             // TODO: Report error. | 
| 2032 | 0 |             break; | 
| 2033 | 0 |           unsigned int UnicodeScalarValue; | 
| 2034 | 0 |           if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) | 
| 2035 | 0 |             // TODO: Report error. | 
| 2036 | 0 |             UnicodeScalarValue = 0xFFFD; | 
| 2037 | 0 |           encodeUTF8(UnicodeScalarValue, Storage); | 
| 2038 | 0 |           UnquotedValue = UnquotedValue.substr(4); | 
| 2039 | 0 |           break; | 
| 2040 | 0 |         } | 
| 2041 | 0 |       case 'U': { | 
| 2042 | 0 |           if (UnquotedValue.size() < 9) | 
| 2043 | 0 |             // TODO: Report error. | 
| 2044 | 0 |             break; | 
| 2045 | 0 |           unsigned int UnicodeScalarValue; | 
| 2046 | 0 |           if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) | 
| 2047 | 0 |             // TODO: Report error. | 
| 2048 | 0 |             UnicodeScalarValue = 0xFFFD; | 
| 2049 | 0 |           encodeUTF8(UnicodeScalarValue, Storage); | 
| 2050 | 0 |           UnquotedValue = UnquotedValue.substr(8); | 
| 2051 | 0 |           break; | 
| 2052 | 0 |         } | 
| 2053 | 0 |       } | 
| 2054 | 0 |       UnquotedValue = UnquotedValue.substr(1); | 
| 2055 | 0 |     } | 
| 2056 | 0 |   } | 
| 2057 | 0 |   Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); | 
| 2058 | 0 |   return StringRef(Storage.begin(), Storage.size()); | 
| 2059 | 0 | } | 
| 2060 |  |  | 
| 2061 | 0 | Node *KeyValueNode::getKey() { | 
| 2062 | 0 |   if (Key) | 
| 2063 | 0 |     return Key; | 
| 2064 | 0 |   // Handle implicit null keys. | 
| 2065 | 0 |   { | 
| 2066 | 0 |     Token &t = peekNext(); | 
| 2067 | 0 |     if (   t.Kind == Token::TK_BlockEnd | 
| 2068 | 0 |         || t.Kind == Token::TK_Value | 
| 2069 | 0 |         || t.Kind == Token::TK_Error) { | 
| 2070 | 0 |       return Key = new (getAllocator()) NullNode(Doc); | 
| 2071 | 0 |     } | 
| 2072 | 0 |     if (t.Kind == Token::TK_Key) | 
| 2073 | 0 |       getNext(); // skip TK_Key. | 
| 2074 | 0 |   } | 
| 2075 | 0 | 
 | 
| 2076 | 0 |   // Handle explicit null keys. | 
| 2077 | 0 |   Token &t = peekNext(); | 
| 2078 | 0 |   if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { | 
| 2079 | 0 |     return Key = new (getAllocator()) NullNode(Doc); | 
| 2080 | 0 |   } | 
| 2081 | 0 |  | 
| 2082 | 0 |   // We've got a normal key. | 
| 2083 | 0 |   return Key = parseBlockNode(); | 
| 2084 | 0 | } | 
| 2085 |  |  | 
| 2086 | 0 | Node *KeyValueNode::getValue() { | 
| 2087 | 0 |   if (Value) | 
| 2088 | 0 |     return Value; | 
| 2089 | 0 |  | 
| 2090 | 0 |   if (Node* Key = getKey()) | 
| 2091 | 0 |     Key->skip(); | 
| 2092 | 0 |   else { | 
| 2093 | 0 |     setError("Null key in Key Value.", peekNext()); | 
| 2094 | 0 |     return Value = new (getAllocator()) NullNode(Doc); | 
| 2095 | 0 |   } | 
| 2096 | 0 |  | 
| 2097 | 0 |   if (failed()) | 
| 2098 | 0 |     return Value = new (getAllocator()) NullNode(Doc); | 
| 2099 | 0 |  | 
| 2100 | 0 |   // Handle implicit null values. | 
| 2101 | 0 |   { | 
| 2102 | 0 |     Token &t = peekNext(); | 
| 2103 | 0 |     if (   t.Kind == Token::TK_BlockEnd | 
| 2104 | 0 |         || t.Kind == Token::TK_FlowMappingEnd | 
| 2105 | 0 |         || t.Kind == Token::TK_Key | 
| 2106 | 0 |         || t.Kind == Token::TK_FlowEntry | 
| 2107 | 0 |         || t.Kind == Token::TK_Error) { | 
| 2108 | 0 |       return Value = new (getAllocator()) NullNode(Doc); | 
| 2109 | 0 |     } | 
| 2110 | 0 |  | 
| 2111 | 0 |     if (t.Kind != Token::TK_Value) { | 
| 2112 | 0 |       setError("Unexpected token in Key Value.", t); | 
| 2113 | 0 |       return Value = new (getAllocator()) NullNode(Doc); | 
| 2114 | 0 |     } | 
| 2115 | 0 |     getNext(); // skip TK_Value. | 
| 2116 | 0 |   } | 
| 2117 | 0 | 
 | 
| 2118 | 0 |   // Handle explicit null values. | 
| 2119 | 0 |   Token &t = peekNext(); | 
| 2120 | 0 |   if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { | 
| 2121 | 0 |     return Value = new (getAllocator()) NullNode(Doc); | 
| 2122 | 0 |   } | 
| 2123 | 0 |  | 
| 2124 | 0 |   // We got a normal value. | 
| 2125 | 0 |   return Value = parseBlockNode(); | 
| 2126 | 0 | } | 
| 2127 |  |  | 
| 2128 | 0 | void MappingNode::increment() { | 
| 2129 | 0 |   if (failed()) { | 
| 2130 | 0 |     IsAtEnd = true; | 
| 2131 | 0 |     CurrentEntry = nullptr; | 
| 2132 | 0 |     return; | 
| 2133 | 0 |   } | 
| 2134 | 0 |   if (CurrentEntry) { | 
| 2135 | 0 |     CurrentEntry->skip(); | 
| 2136 | 0 |     if (Type == MT_Inline) { | 
| 2137 | 0 |       IsAtEnd = true; | 
| 2138 | 0 |       CurrentEntry = nullptr; | 
| 2139 | 0 |       return; | 
| 2140 | 0 |     } | 
| 2141 | 0 |   } | 
| 2142 | 0 |   Token T = peekNext(); | 
| 2143 | 0 |   if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { | 
| 2144 | 0 |     // KeyValueNode eats the TK_Key. That way it can detect null keys. | 
| 2145 | 0 |     CurrentEntry = new (getAllocator()) KeyValueNode(Doc); | 
| 2146 | 0 |   } else if (Type == MT_Block) { | 
| 2147 | 0 |     switch (T.Kind) { | 
| 2148 | 0 |     case Token::TK_BlockEnd: | 
| 2149 | 0 |       getNext(); | 
| 2150 | 0 |       IsAtEnd = true; | 
| 2151 | 0 |       CurrentEntry = nullptr; | 
| 2152 | 0 |       break; | 
| 2153 | 0 |     default: | 
| 2154 | 0 |       setError("Unexpected token. Expected Key or Block End", T); | 
| 2155 | 0 |       LLVM_FALLTHROUGH; | 
| 2156 | 0 |     case Token::TK_Error: | 
| 2157 | 0 |       IsAtEnd = true; | 
| 2158 | 0 |       CurrentEntry = nullptr; | 
| 2159 | 0 |     } | 
| 2160 | 0 |   } else { | 
| 2161 | 0 |     switch (T.Kind) { | 
| 2162 | 0 |     case Token::TK_FlowEntry: | 
| 2163 | 0 |       // Eat the flow entry and recurse. | 
| 2164 | 0 |       getNext(); | 
| 2165 | 0 |       return increment(); | 
| 2166 | 0 |     case Token::TK_FlowMappingEnd: | 
| 2167 | 0 |       getNext(); | 
| 2168 | 0 |       LLVM_FALLTHROUGH; | 
| 2169 | 0 |     case Token::TK_Error: | 
| 2170 | 0 |       // Set this to end iterator. | 
| 2171 | 0 |       IsAtEnd = true; | 
| 2172 | 0 |       CurrentEntry = nullptr; | 
| 2173 | 0 |       break; | 
| 2174 | 0 |     default: | 
| 2175 | 0 |       setError( "Unexpected token. Expected Key, Flow Entry, or Flow " | 
| 2176 | 0 |                 "Mapping End." | 
| 2177 | 0 |               , T); | 
| 2178 | 0 |       IsAtEnd = true; | 
| 2179 | 0 |       CurrentEntry = nullptr; | 
| 2180 | 0 |     } | 
| 2181 | 0 |   } | 
| 2182 | 0 | } | 
| 2183 |  |  | 
| 2184 | 0 | void SequenceNode::increment() { | 
| 2185 | 0 |   if (failed()) { | 
| 2186 | 0 |     IsAtEnd = true; | 
| 2187 | 0 |     CurrentEntry = nullptr; | 
| 2188 | 0 |     return; | 
| 2189 | 0 |   } | 
| 2190 | 0 |   if (CurrentEntry) | 
| 2191 | 0 |     CurrentEntry->skip(); | 
| 2192 | 0 |   Token T = peekNext(); | 
| 2193 | 0 |   if (SeqType == ST_Block) { | 
| 2194 | 0 |     switch (T.Kind) { | 
| 2195 | 0 |     case Token::TK_BlockEntry: | 
| 2196 | 0 |       getNext(); | 
| 2197 | 0 |       CurrentEntry = parseBlockNode(); | 
| 2198 | 0 |       if (!CurrentEntry) { // An error occurred. | 
| 2199 | 0 |         IsAtEnd = true; | 
| 2200 | 0 |         CurrentEntry = nullptr; | 
| 2201 | 0 |       } | 
| 2202 | 0 |       break; | 
| 2203 | 0 |     case Token::TK_BlockEnd: | 
| 2204 | 0 |       getNext(); | 
| 2205 | 0 |       IsAtEnd = true; | 
| 2206 | 0 |       CurrentEntry = nullptr; | 
| 2207 | 0 |       break; | 
| 2208 | 0 |     default: | 
| 2209 | 0 |       setError( "Unexpected token. Expected Block Entry or Block End." | 
| 2210 | 0 |               , T); | 
| 2211 | 0 |       LLVM_FALLTHROUGH; | 
| 2212 | 0 |     case Token::TK_Error: | 
| 2213 | 0 |       IsAtEnd = true; | 
| 2214 | 0 |       CurrentEntry = nullptr; | 
| 2215 | 0 |     } | 
| 2216 | 0 |   } else if (SeqType == ST_Indentless) { | 
| 2217 | 0 |     switch (T.Kind) { | 
| 2218 | 0 |     case Token::TK_BlockEntry: | 
| 2219 | 0 |       getNext(); | 
| 2220 | 0 |       CurrentEntry = parseBlockNode(); | 
| 2221 | 0 |       if (!CurrentEntry) { // An error occurred. | 
| 2222 | 0 |         IsAtEnd = true; | 
| 2223 | 0 |         CurrentEntry = nullptr; | 
| 2224 | 0 |       } | 
| 2225 | 0 |       break; | 
| 2226 | 0 |     default: | 
| 2227 | 0 |     case Token::TK_Error: | 
| 2228 | 0 |       IsAtEnd = true; | 
| 2229 | 0 |       CurrentEntry = nullptr; | 
| 2230 | 0 |     } | 
| 2231 | 0 |   } else if (SeqType == ST_Flow) { | 
| 2232 | 0 |     switch (T.Kind) { | 
| 2233 | 0 |     case Token::TK_FlowEntry: | 
| 2234 | 0 |       // Eat the flow entry and recurse. | 
| 2235 | 0 |       getNext(); | 
| 2236 | 0 |       WasPreviousTokenFlowEntry = true; | 
| 2237 | 0 |       return increment(); | 
| 2238 | 0 |     case Token::TK_FlowSequenceEnd: | 
| 2239 | 0 |       getNext(); | 
| 2240 | 0 |       LLVM_FALLTHROUGH; | 
| 2241 | 0 |     case Token::TK_Error: | 
| 2242 | 0 |       // Set this to end iterator. | 
| 2243 | 0 |       IsAtEnd = true; | 
| 2244 | 0 |       CurrentEntry = nullptr; | 
| 2245 | 0 |       break; | 
| 2246 | 0 |     case Token::TK_StreamEnd: | 
| 2247 | 0 |     case Token::TK_DocumentEnd: | 
| 2248 | 0 |     case Token::TK_DocumentStart: | 
| 2249 | 0 |       setError("Could not find closing ]!", T); | 
| 2250 | 0 |       // Set this to end iterator. | 
| 2251 | 0 |       IsAtEnd = true; | 
| 2252 | 0 |       CurrentEntry = nullptr; | 
| 2253 | 0 |       break; | 
| 2254 | 0 |     default: | 
| 2255 | 0 |       if (!WasPreviousTokenFlowEntry) { | 
| 2256 | 0 |         setError("Expected , between entries!", T); | 
| 2257 | 0 |         IsAtEnd = true; | 
| 2258 | 0 |         CurrentEntry = nullptr; | 
| 2259 | 0 |         break; | 
| 2260 | 0 |       } | 
| 2261 | 0 |       // Otherwise it must be a flow entry. | 
| 2262 | 0 |       CurrentEntry = parseBlockNode(); | 
| 2263 | 0 |       if (!CurrentEntry) { | 
| 2264 | 0 |         IsAtEnd = true; | 
| 2265 | 0 |       } | 
| 2266 | 0 |       WasPreviousTokenFlowEntry = false; | 
| 2267 | 0 |       break; | 
| 2268 | 0 |     } | 
| 2269 | 0 |   } | 
| 2270 | 0 | } | 
| 2271 |  |  | 
| 2272 | 0 | Document::Document(Stream &S) : stream(S), Root(nullptr) { | 
| 2273 | 0 |   // Tag maps starts with two default mappings. | 
| 2274 | 0 |   TagMap["!"] = "!"; | 
| 2275 | 0 |   TagMap["!!"] = "tag:yaml.org,2002:"; | 
| 2276 | 0 | 
 | 
| 2277 | 0 |   if (parseDirectives()) | 
| 2278 | 0 |     expectToken(Token::TK_DocumentStart); | 
| 2279 | 0 |   Token &T = peekNext(); | 
| 2280 | 0 |   if (T.Kind == Token::TK_DocumentStart) | 
| 2281 | 0 |     getNext(); | 
| 2282 | 0 | } | 
| 2283 |  |  | 
| 2284 | 0 | bool Document::skip()  { | 
| 2285 | 0 |   if (stream.scanner->failed()) | 
| 2286 | 0 |     return false; | 
| 2287 | 0 |   if (!Root && !getRoot()) | 
| 2288 | 0 |     return false; | 
| 2289 | 0 |   Root->skip(); | 
| 2290 | 0 |   Token &T = peekNext(); | 
| 2291 | 0 |   if (T.Kind == Token::TK_StreamEnd) | 
| 2292 | 0 |     return false; | 
| 2293 | 0 |   if (T.Kind == Token::TK_DocumentEnd) { | 
| 2294 | 0 |     getNext(); | 
| 2295 | 0 |     return skip(); | 
| 2296 | 0 |   } | 
| 2297 | 0 |   return true; | 
| 2298 | 0 | } | 
| 2299 |  |  | 
| 2300 | 0 | Token &Document::peekNext() { | 
| 2301 | 0 |   return stream.scanner->peekNext(); | 
| 2302 | 0 | } | 
| 2303 |  |  | 
| 2304 | 0 | Token Document::getNext() { | 
| 2305 | 0 |   return stream.scanner->getNext(); | 
| 2306 | 0 | } | 
| 2307 |  |  | 
| 2308 | 0 | void Document::setError(const Twine &Message, Token &Location) const { | 
| 2309 | 0 |   stream.scanner->setError(Message, Location.Range.begin()); | 
| 2310 | 0 | } | 
| 2311 |  |  | 
| 2312 | 0 | bool Document::failed() const { | 
| 2313 | 0 |   return stream.scanner->failed(); | 
| 2314 | 0 | } | 
| 2315 |  |  | 
| 2316 | 0 | Node *Document::parseBlockNode() { | 
| 2317 | 0 |   Token T = peekNext(); | 
| 2318 | 0 |   // Handle properties. | 
| 2319 | 0 |   Token AnchorInfo; | 
| 2320 | 0 |   Token TagInfo; | 
| 2321 | 0 | parse_property: | 
| 2322 | 0 |   switch (T.Kind) { | 
| 2323 | 0 |   case Token::TK_Alias: | 
| 2324 | 0 |     getNext(); | 
| 2325 | 0 |     return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); | 
| 2326 | 0 |   case Token::TK_Anchor: | 
| 2327 | 0 |     if (AnchorInfo.Kind == Token::TK_Anchor) { | 
| 2328 | 0 |       setError("Already encountered an anchor for this node!", T); | 
| 2329 | 0 |       return nullptr; | 
| 2330 | 0 |     } | 
| 2331 | 0 |     AnchorInfo = getNext(); // Consume TK_Anchor. | 
| 2332 | 0 |     T = peekNext(); | 
| 2333 | 0 |     goto parse_property; | 
| 2334 | 0 |   case Token::TK_Tag: | 
| 2335 | 0 |     if (TagInfo.Kind == Token::TK_Tag) { | 
| 2336 | 0 |       setError("Already encountered a tag for this node!", T); | 
| 2337 | 0 |       return nullptr; | 
| 2338 | 0 |     } | 
| 2339 | 0 |     TagInfo = getNext(); // Consume TK_Tag. | 
| 2340 | 0 |     T = peekNext(); | 
| 2341 | 0 |     goto parse_property; | 
| 2342 | 0 |   default: | 
| 2343 | 0 |     break; | 
| 2344 | 0 |   } | 
| 2345 | 0 |  | 
| 2346 | 0 |   switch (T.Kind) { | 
| 2347 | 0 |   case Token::TK_BlockEntry: | 
| 2348 | 0 |     // We got an unindented BlockEntry sequence. This is not terminated with | 
| 2349 | 0 |     // a BlockEnd. | 
| 2350 | 0 |     // Don't eat the TK_BlockEntry, SequenceNode needs it. | 
| 2351 | 0 |     return new (NodeAllocator) SequenceNode( stream.CurrentDoc | 
| 2352 | 0 |                                            , AnchorInfo.Range.substr(1) | 
| 2353 | 0 |                                            , TagInfo.Range | 
| 2354 | 0 |                                            , SequenceNode::ST_Indentless); | 
| 2355 | 0 |   case Token::TK_BlockSequenceStart: | 
| 2356 | 0 |     getNext(); | 
| 2357 | 0 |     return new (NodeAllocator) | 
| 2358 | 0 |       SequenceNode( stream.CurrentDoc | 
| 2359 | 0 |                   , AnchorInfo.Range.substr(1) | 
| 2360 | 0 |                   , TagInfo.Range | 
| 2361 | 0 |                   , SequenceNode::ST_Block); | 
| 2362 | 0 |   case Token::TK_BlockMappingStart: | 
| 2363 | 0 |     getNext(); | 
| 2364 | 0 |     return new (NodeAllocator) | 
| 2365 | 0 |       MappingNode( stream.CurrentDoc | 
| 2366 | 0 |                  , AnchorInfo.Range.substr(1) | 
| 2367 | 0 |                  , TagInfo.Range | 
| 2368 | 0 |                  , MappingNode::MT_Block); | 
| 2369 | 0 |   case Token::TK_FlowSequenceStart: | 
| 2370 | 0 |     getNext(); | 
| 2371 | 0 |     return new (NodeAllocator) | 
| 2372 | 0 |       SequenceNode( stream.CurrentDoc | 
| 2373 | 0 |                   , AnchorInfo.Range.substr(1) | 
| 2374 | 0 |                   , TagInfo.Range | 
| 2375 | 0 |                   , SequenceNode::ST_Flow); | 
| 2376 | 0 |   case Token::TK_FlowMappingStart: | 
| 2377 | 0 |     getNext(); | 
| 2378 | 0 |     return new (NodeAllocator) | 
| 2379 | 0 |       MappingNode( stream.CurrentDoc | 
| 2380 | 0 |                  , AnchorInfo.Range.substr(1) | 
| 2381 | 0 |                  , TagInfo.Range | 
| 2382 | 0 |                  , MappingNode::MT_Flow); | 
| 2383 | 0 |   case Token::TK_Scalar: | 
| 2384 | 0 |     getNext(); | 
| 2385 | 0 |     return new (NodeAllocator) | 
| 2386 | 0 |       ScalarNode( stream.CurrentDoc | 
| 2387 | 0 |                 , AnchorInfo.Range.substr(1) | 
| 2388 | 0 |                 , TagInfo.Range | 
| 2389 | 0 |                 , T.Range); | 
| 2390 | 0 |   case Token::TK_BlockScalar: { | 
| 2391 | 0 |     getNext(); | 
| 2392 | 0 |     StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1); | 
| 2393 | 0 |     StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back(); | 
| 2394 | 0 |     return new (NodeAllocator) | 
| 2395 | 0 |         BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1), | 
| 2396 | 0 |                         TagInfo.Range, StrCopy, T.Range); | 
| 2397 | 0 |   } | 
| 2398 | 0 |   case Token::TK_Key: | 
| 2399 | 0 |     // Don't eat the TK_Key, KeyValueNode expects it. | 
| 2400 | 0 |     return new (NodeAllocator) | 
| 2401 | 0 |       MappingNode( stream.CurrentDoc | 
| 2402 | 0 |                  , AnchorInfo.Range.substr(1) | 
| 2403 | 0 |                  , TagInfo.Range | 
| 2404 | 0 |                  , MappingNode::MT_Inline); | 
| 2405 | 0 |   case Token::TK_DocumentStart: | 
| 2406 | 0 |   case Token::TK_DocumentEnd: | 
| 2407 | 0 |   case Token::TK_StreamEnd: | 
| 2408 | 0 |   default: | 
| 2409 | 0 |     // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not | 
| 2410 | 0 |     //       !!null null. | 
| 2411 | 0 |     return new (NodeAllocator) NullNode(stream.CurrentDoc); | 
| 2412 | 0 |   case Token::TK_FlowMappingEnd: | 
| 2413 | 0 |   case Token::TK_FlowSequenceEnd: | 
| 2414 | 0 |   case Token::TK_FlowEntry: { | 
| 2415 | 0 |     if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root))) | 
| 2416 | 0 |       return new (NodeAllocator) NullNode(stream.CurrentDoc); | 
| 2417 | 0 |  | 
| 2418 | 0 |     setError("Unexpected token", T); | 
| 2419 | 0 |     return nullptr; | 
| 2420 | 0 |   } | 
| 2421 | 0 |   case Token::TK_Error: | 
| 2422 | 0 |     return nullptr; | 
| 2423 | 0 |   } | 
| 2424 | 0 |   llvm_unreachable("Control flow shouldn't reach here."); | 
| 2425 | 0 |   return nullptr; | 
| 2426 | 0 | } | 
| 2427 |  |  | 
| 2428 | 0 | bool Document::parseDirectives() { | 
| 2429 | 0 |   bool isDirective = false; | 
| 2430 | 0 |   while (true) { | 
| 2431 | 0 |     Token T = peekNext(); | 
| 2432 | 0 |     if (T.Kind == Token::TK_TagDirective) { | 
| 2433 | 0 |       parseTAGDirective(); | 
| 2434 | 0 |       isDirective = true; | 
| 2435 | 0 |     } else if (T.Kind == Token::TK_VersionDirective) { | 
| 2436 | 0 |       parseYAMLDirective(); | 
| 2437 | 0 |       isDirective = true; | 
| 2438 | 0 |     } else | 
| 2439 | 0 |       break; | 
| 2440 | 0 |   } | 
| 2441 | 0 |   return isDirective; | 
| 2442 | 0 | } | 
| 2443 |  |  | 
| 2444 | 0 | void Document::parseYAMLDirective() { | 
| 2445 | 0 |   getNext(); // Eat %YAML <version> | 
| 2446 | 0 | } | 
| 2447 |  |  | 
| 2448 | 0 | void Document::parseTAGDirective() { | 
| 2449 | 0 |   Token Tag = getNext(); // %TAG <handle> <prefix> | 
| 2450 | 0 |   StringRef T = Tag.Range; | 
| 2451 | 0 |   // Strip %TAG | 
| 2452 | 0 |   T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); | 
| 2453 | 0 |   std::size_t HandleEnd = T.find_first_of(" \t"); | 
| 2454 | 0 |   StringRef TagHandle = T.substr(0, HandleEnd); | 
| 2455 | 0 |   StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); | 
| 2456 | 0 |   TagMap[TagHandle] = TagPrefix; | 
| 2457 | 0 | } | 
| 2458 |  |  | 
| 2459 | 0 | bool Document::expectToken(int TK) { | 
| 2460 | 0 |   Token T = getNext(); | 
| 2461 | 0 |   if (T.Kind != TK) { | 
| 2462 | 0 |     setError("Unexpected token", T); | 
| 2463 | 0 |     return false; | 
| 2464 | 0 |   } | 
| 2465 | 0 |   return true; | 
| 2466 | 0 | } |