Coverage Report

Created: 2020-06-26 05:44

/home/arjun/llvm-project/llvm/include/llvm/Support/YAMLParser.h
Line
Count
Source (jump to first uncovered line)
1
//===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This is a YAML 1.2 parser.
10
//
11
//  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
12
//
13
//  This currently does not implement the following:
14
//    * Multi-line literal folding.
15
//    * Tag resolution.
16
//    * UTF-16.
17
//    * BOMs anywhere other than the first Unicode scalar value in the file.
18
//
19
//  The most important class here is Stream. This represents a YAML stream with
20
//  0, 1, or many documents.
21
//
22
//  SourceMgr sm;
23
//  StringRef input = getInput();
24
//  yaml::Stream stream(input, sm);
25
//
26
//  for (yaml::document_iterator di = stream.begin(), de = stream.end();
27
//       di != de; ++di) {
28
//    yaml::Node *n = di->getRoot();
29
//    if (n) {
30
//      // Do something with n...
31
//    } else
32
//      break;
33
//  }
34
//
35
//===----------------------------------------------------------------------===//
36
37
#ifndef LLVM_SUPPORT_YAMLPARSER_H
38
#define LLVM_SUPPORT_YAMLPARSER_H
39
40
#include "llvm/ADT/StringRef.h"
41
#include "llvm/Support/Allocator.h"
42
#include "llvm/Support/SMLoc.h"
43
#include <cassert>
44
#include <cstddef>
45
#include <iterator>
46
#include <map>
47
#include <memory>
48
#include <string>
49
#include <system_error>
50
51
namespace llvm {
52
53
class MemoryBufferRef;
54
class SourceMgr;
55
class raw_ostream;
56
class Twine;
57
58
namespace yaml {
59
60
class Document;
61
class document_iterator;
62
class Node;
63
class Scanner;
64
struct Token;
65
66
/// Dump all the tokens in this stream to OS.
67
/// \returns true if there was an error, false otherwise.
68
bool dumpTokens(StringRef Input, raw_ostream &);
69
70
/// Scans all tokens in input without outputting anything. This is used
71
///        for benchmarking the tokenizer.
72
/// \returns true if there was an error, false otherwise.
73
bool scanTokens(StringRef Input);
74
75
/// Escape \a Input for a double quoted scalar; if \p EscapePrintable
76
/// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is
77
/// false, those UTF8 sequences encoding printable unicode scalars will not be
78
/// escaped, but emitted verbatim.
79
std::string escape(StringRef Input, bool EscapePrintable = true);
80
81
/// This class represents a YAML stream potentially containing multiple
82
///        documents.
83
class Stream {
84
public:
85
  /// This keeps a reference to the string referenced by \p Input.
86
  Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
87
         std::error_code *EC = nullptr);
88
89
  Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
90
         std::error_code *EC = nullptr);
91
  ~Stream();
92
93
  document_iterator begin();
94
  document_iterator end();
95
  void skip();
96
  bool failed();
97
98
0
  bool validate() {
99
0
    skip();
100
0
    return !failed();
101
0
  }
102
103
  void printError(Node *N, const Twine &Msg);
104
105
private:
106
  friend class Document;
107
108
  std::unique_ptr<Scanner> scanner;
109
  std::unique_ptr<Document> CurrentDoc;
110
};
111
112
/// Abstract base class for all Nodes.
113
class Node {
114
  virtual void anchor();
115
116
public:
117
  enum NodeKind {
118
    NK_Null,
119
    NK_Scalar,
120
    NK_BlockScalar,
121
    NK_KeyValue,
122
    NK_Mapping,
123
    NK_Sequence,
124
    NK_Alias
125
  };
126
127
  Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
128
       StringRef Tag);
129
130
  // It's not safe to copy YAML nodes; the document is streamed and the position
131
  // is part of the state.
132
  Node(const Node &) = delete;
133
  void operator=(const Node &) = delete;
134
135
  void *operator new(size_t Size, BumpPtrAllocator &Alloc,
136
0
                     size_t Alignment = 16) noexcept {
137
0
    return Alloc.Allocate(Size, Alignment);
138
0
  }
139
140
  void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
141
0
                       size_t Size) noexcept {
142
0
    Alloc.Deallocate(Ptr, Size, 0);
143
0
  }
144
145
  void operator delete(void *) noexcept = delete;
146
147
  /// Get the value of the anchor attached to this node. If it does not
148
  ///        have one, getAnchor().size() will be 0.
149
0
  StringRef getAnchor() const { return Anchor; }
150
151
  /// Get the tag as it was written in the document. This does not
152
  ///   perform tag resolution.
153
0
  StringRef getRawTag() const { return Tag; }
154
155
  /// Get the verbatium tag for a given Node. This performs tag resoluton
156
  ///   and substitution.
157
  std::string getVerbatimTag() const;
158
159
0
  SMRange getSourceRange() const { return SourceRange; }
160
0
  void setSourceRange(SMRange SR) { SourceRange = SR; }
161
162
  // These functions forward to Document and Scanner.
163
  Token &peekNext();
164
  Token getNext();
165
  Node *parseBlockNode();
166
  BumpPtrAllocator &getAllocator();
167
  void setError(const Twine &Message, Token &Location) const;
168
  bool failed() const;
169
170
0
  virtual void skip() {}
171
172
0
  unsigned int getType() const { return TypeID; }
173
174
protected:
175
  std::unique_ptr<Document> &Doc;
176
  SMRange SourceRange;
177
178
  ~Node() = default;
179
180
private:
181
  unsigned int TypeID;
182
  StringRef Anchor;
183
  /// The tag as typed in the document.
184
  StringRef Tag;
185
};
186
187
/// A null value.
188
///
189
/// Example:
190
///   !!null null
191
class NullNode final : public Node {
192
  void anchor() override;
193
194
public:
195
  NullNode(std::unique_ptr<Document> &D)
196
0
      : Node(NK_Null, D, StringRef(), StringRef()) {}
197
198
0
  static bool classof(const Node *N) { return N->getType() == NK_Null; }
199
};
200
201
/// A scalar node is an opaque datum that can be presented as a
202
///        series of zero or more Unicode scalar values.
203
///
204
/// Example:
205
///   Adena
206
class ScalarNode final : public Node {
207
  void anchor() override;
208
209
public:
210
  ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
211
             StringRef Val)
212
0
      : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
213
0
    SMLoc Start = SMLoc::getFromPointer(Val.begin());
214
0
    SMLoc End = SMLoc::getFromPointer(Val.end());
215
0
    SourceRange = SMRange(Start, End);
216
0
  }
217
218
  // Return Value without any escaping or folding or other fun YAML stuff. This
219
  // is the exact bytes that are contained in the file (after conversion to
220
  // utf8).
221
0
  StringRef getRawValue() const { return Value; }
222
223
  /// Gets the value of this node as a StringRef.
224
  ///
225
  /// \param Storage is used to store the content of the returned StringRef iff
226
  ///        it requires any modification from how it appeared in the source.
227
  ///        This happens with escaped characters and multi-line literals.
228
  StringRef getValue(SmallVectorImpl<char> &Storage) const;
229
230
0
  static bool classof(const Node *N) {
231
0
    return N->getType() == NK_Scalar;
232
0
  }
233
234
private:
235
  StringRef Value;
236
237
  StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
238
                                 StringRef::size_type Start,
239
                                 SmallVectorImpl<char> &Storage) const;
240
};
241
242
/// A block scalar node is an opaque datum that can be presented as a
243
///        series of zero or more Unicode scalar values.
244
///
245
/// Example:
246
///   |
247
///     Hello
248
///     World
249
class BlockScalarNode final : public Node {
250
  void anchor() override;
251
252
public:
253
  BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
254
                  StringRef Value, StringRef RawVal)
255
0
      : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
256
0
    SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
257
0
    SMLoc End = SMLoc::getFromPointer(RawVal.end());
258
0
    SourceRange = SMRange(Start, End);
259
0
  }
260
261
  /// Gets the value of this node as a StringRef.
262
0
  StringRef getValue() const { return Value; }
263
264
0
  static bool classof(const Node *N) {
265
0
    return N->getType() == NK_BlockScalar;
266
0
  }
267
268
private:
269
  StringRef Value;
270
};
271
272
/// A key and value pair. While not technically a Node under the YAML
273
///        representation graph, it is easier to treat them this way.
274
///
275
/// TODO: Consider making this not a child of Node.
276
///
277
/// Example:
278
///   Section: .text
279
class KeyValueNode final : public Node {
280
  void anchor() override;
281
282
public:
283
  KeyValueNode(std::unique_ptr<Document> &D)
284
0
      : Node(NK_KeyValue, D, StringRef(), StringRef()) {}
285
286
  /// Parse and return the key.
287
  ///
288
  /// This may be called multiple times.
289
  ///
290
  /// \returns The key, or nullptr if failed() == true.
291
  Node *getKey();
292
293
  /// Parse and return the value.
294
  ///
295
  /// This may be called multiple times.
296
  ///
297
  /// \returns The value, or nullptr if failed() == true.
298
  Node *getValue();
299
300
0
  void skip() override {
301
0
    if (Node *Key = getKey()) {
302
0
      Key->skip();
303
0
      if (Node *Val = getValue())
304
0
        Val->skip();
305
0
    }
306
0
  }
307
308
0
  static bool classof(const Node *N) {
309
0
    return N->getType() == NK_KeyValue;
310
0
  }
311
312
private:
313
  Node *Key = nullptr;
314
  Node *Value = nullptr;
315
};
316
317
/// This is an iterator abstraction over YAML collections shared by both
318
///        sequences and maps.
319
///
320
/// BaseT must have a ValueT* member named CurrentEntry and a member function
321
/// increment() which must set CurrentEntry to 0 to create an end iterator.
322
template <class BaseT, class ValueT>
323
class basic_collection_iterator
324
    : public std::iterator<std::input_iterator_tag, ValueT> {
325
public:
326
0
  basic_collection_iterator() = default;
Unexecuted instantiation: _ZN4llvm4yaml25basic_collection_iteratorINS0_11MappingNodeENS0_12KeyValueNodeEEC2Ev
Unexecuted instantiation: _ZN4llvm4yaml25basic_collection_iteratorINS0_12SequenceNodeENS0_4NodeEEC2Ev
327
0
  basic_collection_iterator(BaseT *B) : Base(B) {}
Unexecuted instantiation: _ZN4llvm4yaml25basic_collection_iteratorINS0_11MappingNodeENS0_12KeyValueNodeEEC2EPS2_
Unexecuted instantiation: _ZN4llvm4yaml25basic_collection_iteratorINS0_12SequenceNodeENS0_4NodeEEC2EPS2_
328
329
0
  ValueT *operator->() const {
330
0
    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
331
0
    return Base->CurrentEntry;
332
0
  }
Unexecuted instantiation: _ZNK4llvm4yaml25basic_collection_iteratorINS0_11MappingNodeENS0_12KeyValueNodeEEptEv
Unexecuted instantiation: _ZNK4llvm4yaml25basic_collection_iteratorINS0_12SequenceNodeENS0_4NodeEEptEv
333
334
0
  ValueT &operator*() const {
335
0
    assert(Base && Base->CurrentEntry &&
336
0
           "Attempted to dereference end iterator!");
337
0
    return *Base->CurrentEntry;
338
0
  }
Unexecuted instantiation: _ZNK4llvm4yaml25basic_collection_iteratorINS0_11MappingNodeENS0_12KeyValueNodeEEdeEv
Unexecuted instantiation: _ZNK4llvm4yaml25basic_collection_iteratorINS0_12SequenceNodeENS0_4NodeEEdeEv
339
340
  operator ValueT *() const {
341
    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
342
    return Base->CurrentEntry;
343
  }
344
345
  /// Note on EqualityComparable:
346
  ///
347
  /// The iterator is not re-entrant,
348
  /// it is meant to be used for parsing YAML on-demand
349
  /// Once iteration started - it can point only to one entry at a time
350
  /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
351
  /// iff Base and Other.Base are equal.
352
  bool operator==(const basic_collection_iterator &Other) const {
353
    if (Base && (Base == Other.Base)) {
354
      assert((Base->CurrentEntry == Other.Base->CurrentEntry)
355
             && "Equal Bases expected to point to equal Entries");
356
    }
357
358
    return Base == Other.Base;
359
  }
360
361
0
  bool operator!=(const basic_collection_iterator &Other) const {
362
0
    return !(Base == Other.Base);
363
0
  }
Unexecuted instantiation: _ZNK4llvm4yaml25basic_collection_iteratorINS0_11MappingNodeENS0_12KeyValueNodeEEneERKS4_
Unexecuted instantiation: _ZNK4llvm4yaml25basic_collection_iteratorINS0_12SequenceNodeENS0_4NodeEEneERKS4_
364
365
0
  basic_collection_iterator &operator++() {
366
0
    assert(Base && "Attempted to advance iterator past end!");
367
0
    Base->increment();
368
0
    // Create an end iterator.
369
0
    if (!Base->CurrentEntry)
370
0
      Base = nullptr;
371
0
    return *this;
372
0
  }
Unexecuted instantiation: _ZN4llvm4yaml25basic_collection_iteratorINS0_12SequenceNodeENS0_4NodeEEppEv
Unexecuted instantiation: _ZN4llvm4yaml25basic_collection_iteratorINS0_11MappingNodeENS0_12KeyValueNodeEEppEv
373
374
private:
375
  BaseT *Base = nullptr;
376
};
377
378
// The following two templates are used for both MappingNode and Sequence Node.
379
template <class CollectionType>
380
0
typename CollectionType::iterator begin(CollectionType &C) {
381
0
  assert(C.IsAtBeginning && "You may only iterate over a collection once!");
382
0
  C.IsAtBeginning = false;
383
0
  typename CollectionType::iterator ret(&C);
384
0
  ++ret;
385
0
  return ret;
386
0
}
Unexecuted instantiation: _ZN4llvm4yaml5beginINS0_11MappingNodeEEENT_8iteratorERS3_
Unexecuted instantiation: _ZN4llvm4yaml5beginINS0_12SequenceNodeEEENT_8iteratorERS3_
387
388
0
template <class CollectionType> void skip(CollectionType &C) {
389
0
  // TODO: support skipping from the middle of a parsed collection ;/
390
0
  assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
391
0
  if (C.IsAtBeginning)
392
0
    for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
393
0
         ++i)
394
0
      i->skip();
395
0
}
Unexecuted instantiation: _ZN4llvm4yaml4skipINS0_11MappingNodeEEEvRT_
Unexecuted instantiation: _ZN4llvm4yaml4skipINS0_12SequenceNodeEEEvRT_
396
397
/// Represents a YAML map created from either a block map for a flow map.
398
///
399
/// This parses the YAML stream as increment() is called.
400
///
401
/// Example:
402
///   Name: _main
403
///   Scope: Global
404
class MappingNode final : public Node {
405
  void anchor() override;
406
407
public:
408
  enum MappingType {
409
    MT_Block,
410
    MT_Flow,
411
    MT_Inline ///< An inline mapping node is used for "[key: value]".
412
  };
413
414
  MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
415
              MappingType MT)
416
0
      : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {}
417
418
  friend class basic_collection_iterator<MappingNode, KeyValueNode>;
419
420
  using iterator = basic_collection_iterator<MappingNode, KeyValueNode>;
421
422
  template <class T> friend typename T::iterator yaml::begin(T &);
423
  template <class T> friend void yaml::skip(T &);
424
425
0
  iterator begin() { return yaml::begin(*this); }
426
427
0
  iterator end() { return iterator(); }
428
429
0
  void skip() override { yaml::skip(*this); }
430
431
0
  static bool classof(const Node *N) {
432
0
    return N->getType() == NK_Mapping;
433
0
  }
434
435
private:
436
  MappingType Type;
437
  bool IsAtBeginning = true;
438
  bool IsAtEnd = false;
439
  KeyValueNode *CurrentEntry = nullptr;
440
441
  void increment();
442
};
443
444
/// Represents a YAML sequence created from either a block sequence for a
445
///        flow sequence.
446
///
447
/// This parses the YAML stream as increment() is called.
448
///
449
/// Example:
450
///   - Hello
451
///   - World
452
class SequenceNode final : public Node {
453
  void anchor() override;
454
455
public:
456
  enum SequenceType {
457
    ST_Block,
458
    ST_Flow,
459
    // Use for:
460
    //
461
    // key:
462
    // - val1
463
    // - val2
464
    //
465
    // As a BlockMappingEntry and BlockEnd are not created in this case.
466
    ST_Indentless
467
  };
468
469
  SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
470
               SequenceType ST)
471
0
      : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {}
472
473
  friend class basic_collection_iterator<SequenceNode, Node>;
474
475
  using iterator = basic_collection_iterator<SequenceNode, Node>;
476
477
  template <class T> friend typename T::iterator yaml::begin(T &);
478
  template <class T> friend void yaml::skip(T &);
479
480
  void increment();
481
482
0
  iterator begin() { return yaml::begin(*this); }
483
484
0
  iterator end() { return iterator(); }
485
486
0
  void skip() override { yaml::skip(*this); }
487
488
0
  static bool classof(const Node *N) {
489
0
    return N->getType() == NK_Sequence;
490
0
  }
491
492
private:
493
  SequenceType SeqType;
494
  bool IsAtBeginning = true;
495
  bool IsAtEnd = false;
496
  bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','.
497
  Node *CurrentEntry = nullptr;
498
};
499
500
/// Represents an alias to a Node with an anchor.
501
///
502
/// Example:
503
///   *AnchorName
504
class AliasNode final : public Node {
505
  void anchor() override;
506
507
public:
508
  AliasNode(std::unique_ptr<Document> &D, StringRef Val)
509
0
      : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
510
511
0
  StringRef getName() const { return Name; }
512
  Node *getTarget();
513
514
0
  static bool classof(const Node *N) { return N->getType() == NK_Alias; }
515
516
private:
517
  StringRef Name;
518
};
519
520
/// A YAML Stream is a sequence of Documents. A document contains a root
521
///        node.
522
class Document {
523
public:
524
  Document(Stream &ParentStream);
525
526
  /// Root for parsing a node. Returns a single node.
527
  Node *parseBlockNode();
528
529
  /// Finish parsing the current document and return true if there are
530
  ///        more. Return false otherwise.
531
  bool skip();
532
533
  /// Parse and return the root level node.
534
0
  Node *getRoot() {
535
0
    if (Root)
536
0
      return Root;
537
0
    return Root = parseBlockNode();
538
0
  }
539
540
0
  const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
541
542
private:
543
  friend class Node;
544
  friend class document_iterator;
545
546
  /// Stream to read tokens from.
547
  Stream &stream;
548
549
  /// Used to allocate nodes to. All are destroyed without calling their
550
  ///        destructor when the document is destroyed.
551
  BumpPtrAllocator NodeAllocator;
552
553
  /// The root node. Used to support skipping a partially parsed
554
  ///        document.
555
  Node *Root;
556
557
  /// Maps tag prefixes to their expansion.
558
  std::map<StringRef, StringRef> TagMap;
559
560
  Token &peekNext();
561
  Token getNext();
562
  void setError(const Twine &Message, Token &Location) const;
563
  bool failed() const;
564
565
  /// Parse %BLAH directives and return true if any were encountered.
566
  bool parseDirectives();
567
568
  /// Parse %YAML
569
  void parseYAMLDirective();
570
571
  /// Parse %TAG
572
  void parseTAGDirective();
573
574
  /// Consume the next token and error if it is not \a TK.
575
  bool expectToken(int TK);
576
};
577
578
/// Iterator abstraction for Documents over a Stream.
579
class document_iterator {
580
public:
581
0
  document_iterator() = default;
582
0
  document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
583
584
0
  bool operator==(const document_iterator &Other) const {
585
0
    if (isAtEnd() || Other.isAtEnd())
586
0
      return isAtEnd() && Other.isAtEnd();
587
0
588
0
    return Doc == Other.Doc;
589
0
  }
590
0
  bool operator!=(const document_iterator &Other) const {
591
0
    return !(*this == Other);
592
0
  }
593
594
0
  document_iterator operator++() {
595
0
    assert(Doc && "incrementing iterator past the end.");
596
0
    if (!(*Doc)->skip()) {
597
0
      Doc->reset(nullptr);
598
0
    } else {
599
0
      Stream &S = (*Doc)->stream;
600
0
      Doc->reset(new Document(S));
601
0
    }
602
0
    return *this;
603
0
  }
604
605
0
  Document &operator*() { return *Doc->get(); }
606
607
0
  std::unique_ptr<Document> &operator->() { return *Doc; }
608
609
private:
610
0
  bool isAtEnd() const { return !Doc || !*Doc; }
611
612
  std::unique_ptr<Document> *Doc = nullptr;
613
};
614
615
} // end namespace yaml
616
617
} // end namespace llvm
618
619
#endif // LLVM_SUPPORT_YAMLPARSER_H