Coverage Report

Created: 2020-06-26 05:44

/home/arjun/llvm-project/llvm/lib/Support/StringRef.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- StringRef.cpp - Lightweight String References ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "llvm/ADT/StringRef.h"
10
#include "llvm/ADT/APFloat.h"
11
#include "llvm/ADT/APInt.h"
12
#include "llvm/ADT/Hashing.h"
13
#include "llvm/ADT/StringExtras.h"
14
#include "llvm/ADT/edit_distance.h"
15
#include "llvm/Support/Error.h"
16
#include <bitset>
17
18
using namespace llvm;
19
20
// MSVC emits references to this into the translation units which reference it.
21
#ifndef _MSC_VER
22
constexpr size_t StringRef::npos;
23
#endif
24
25
// strncasecmp() is not available on non-POSIX systems, so define an
26
// alternative function here.
27
0
static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
28
0
  for (size_t I = 0; I < Length; ++I) {
29
0
    unsigned char LHC = toLower(LHS[I]);
30
0
    unsigned char RHC = toLower(RHS[I]);
31
0
    if (LHC != RHC)
32
0
      return LHC < RHC ? -1 : 1;
33
0
  }
34
0
  return 0;
35
0
}
36
37
/// compare_lower - Compare strings, ignoring case.
38
0
int StringRef::compare_lower(StringRef RHS) const {
39
0
  if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
40
0
    return Res;
41
0
  if (Length == RHS.Length)
42
0
    return 0;
43
0
  return Length < RHS.Length ? -1 : 1;
44
0
}
45
46
/// Check if this string starts with the given \p Prefix, ignoring case.
47
0
bool StringRef::startswith_lower(StringRef Prefix) const {
48
0
  return Length >= Prefix.Length &&
49
0
      ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
50
0
}
51
52
/// Check if this string ends with the given \p Suffix, ignoring case.
53
0
bool StringRef::endswith_lower(StringRef Suffix) const {
54
0
  return Length >= Suffix.Length &&
55
0
      ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
56
0
}
57
58
0
size_t StringRef::find_lower(char C, size_t From) const {
59
0
  char L = toLower(C);
60
0
  return find_if([L](char D) { return toLower(D) == L; }, From);
61
0
}
62
63
/// compare_numeric - Compare strings, handle embedded numbers.
64
0
int StringRef::compare_numeric(StringRef RHS) const {
65
0
  for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
66
0
    // Check for sequences of digits.
67
0
    if (isDigit(Data[I]) && isDigit(RHS.Data[I])) {
68
0
      // The longer sequence of numbers is considered larger.
69
0
      // This doesn't really handle prefixed zeros well.
70
0
      size_t J;
71
0
      for (J = I + 1; J != E + 1; ++J) {
72
0
        bool ld = J < Length && isDigit(Data[J]);
73
0
        bool rd = J < RHS.Length && isDigit(RHS.Data[J]);
74
0
        if (ld != rd)
75
0
          return rd ? -1 : 1;
76
0
        if (!rd)
77
0
          break;
78
0
      }
79
0
      // The two number sequences have the same length (J-I), just memcmp them.
80
0
      if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
81
0
        return Res < 0 ? -1 : 1;
82
0
      // Identical number sequences, continue search after the numbers.
83
0
      I = J - 1;
84
0
      continue;
85
0
    }
86
0
    if (Data[I] != RHS.Data[I])
87
0
      return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
88
0
  }
89
0
  if (Length == RHS.Length)
90
0
    return 0;
91
0
  return Length < RHS.Length ? -1 : 1;
92
0
}
93
94
// Compute the edit distance between the two given strings.
95
unsigned StringRef::edit_distance(llvm::StringRef Other,
96
                                  bool AllowReplacements,
97
0
                                  unsigned MaxEditDistance) const {
98
0
  return llvm::ComputeEditDistance(
99
0
      makeArrayRef(data(), size()),
100
0
      makeArrayRef(Other.data(), Other.size()),
101
0
      AllowReplacements, MaxEditDistance);
102
0
}
103
104
//===----------------------------------------------------------------------===//
105
// String Operations
106
//===----------------------------------------------------------------------===//
107
108
0
std::string StringRef::lower() const {
109
0
  return std::string(map_iterator(begin(), toLower),
110
0
                     map_iterator(end(), toLower));
111
0
}
112
113
0
std::string StringRef::upper() const {
114
0
  return std::string(map_iterator(begin(), toUpper),
115
0
                     map_iterator(end(), toUpper));
116
0
}
117
118
//===----------------------------------------------------------------------===//
119
// String Searching
120
//===----------------------------------------------------------------------===//
121
122
123
/// find - Search for the first string \arg Str in the string.
124
///
125
/// \return - The index of the first occurrence of \arg Str, or npos if not
126
/// found.
127
0
size_t StringRef::find(StringRef Str, size_t From) const {
128
0
  if (From > Length)
129
0
    return npos;
130
0
131
0
  const char *Start = Data + From;
132
0
  size_t Size = Length - From;
133
0
134
0
  const char *Needle = Str.data();
135
0
  size_t N = Str.size();
136
0
  if (N == 0)
137
0
    return From;
138
0
  if (Size < N)
139
0
    return npos;
140
0
  if (N == 1) {
141
0
    const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
142
0
    return Ptr == nullptr ? npos : Ptr - Data;
143
0
  }
144
0
145
0
  const char *Stop = Start + (Size - N + 1);
146
0
147
0
  // For short haystacks or unsupported needles fall back to the naive algorithm
148
0
  if (Size < 16 || N > 255) {
149
0
    do {
150
0
      if (std::memcmp(Start, Needle, N) == 0)
151
0
        return Start - Data;
152
0
      ++Start;
153
0
    } while (Start < Stop);
154
0
    return npos;
155
0
  }
156
0
157
0
  // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
158
0
  uint8_t BadCharSkip[256];
159
0
  std::memset(BadCharSkip, N, 256);
160
0
  for (unsigned i = 0; i != N-1; ++i)
161
0
    BadCharSkip[(uint8_t)Str[i]] = N-1-i;
162
0
163
0
  do {
164
0
    uint8_t Last = Start[N - 1];
165
0
    if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
166
0
      if (std::memcmp(Start, Needle, N - 1) == 0)
167
0
        return Start - Data;
168
0
169
0
    // Otherwise skip the appropriate number of bytes.
170
0
    Start += BadCharSkip[Last];
171
0
  } while (Start < Stop);
172
0
173
0
  return npos;
174
0
}
175
176
0
size_t StringRef::find_lower(StringRef Str, size_t From) const {
177
0
  StringRef This = substr(From);
178
0
  while (This.size() >= Str.size()) {
179
0
    if (This.startswith_lower(Str))
180
0
      return From;
181
0
    This = This.drop_front();
182
0
    ++From;
183
0
  }
184
0
  return npos;
185
0
}
186
187
0
size_t StringRef::rfind_lower(char C, size_t From) const {
188
0
  From = std::min(From, Length);
189
0
  size_t i = From;
190
0
  while (i != 0) {
191
0
    --i;
192
0
    if (toLower(Data[i]) == toLower(C))
193
0
      return i;
194
0
  }
195
0
  return npos;
196
0
}
197
198
/// rfind - Search for the last string \arg Str in the string.
199
///
200
/// \return - The index of the last occurrence of \arg Str, or npos if not
201
/// found.
202
0
size_t StringRef::rfind(StringRef Str) const {
203
0
  size_t N = Str.size();
204
0
  if (N > Length)
205
0
    return npos;
206
0
  for (size_t i = Length - N + 1, e = 0; i != e;) {
207
0
    --i;
208
0
    if (substr(i, N).equals(Str))
209
0
      return i;
210
0
  }
211
0
  return npos;
212
0
}
213
214
0
size_t StringRef::rfind_lower(StringRef Str) const {
215
0
  size_t N = Str.size();
216
0
  if (N > Length)
217
0
    return npos;
218
0
  for (size_t i = Length - N + 1, e = 0; i != e;) {
219
0
    --i;
220
0
    if (substr(i, N).equals_lower(Str))
221
0
      return i;
222
0
  }
223
0
  return npos;
224
0
}
225
226
/// find_first_of - Find the first character in the string that is in \arg
227
/// Chars, or npos if not found.
228
///
229
/// Note: O(size() + Chars.size())
230
StringRef::size_type StringRef::find_first_of(StringRef Chars,
231
0
                                              size_t From) const {
232
0
  std::bitset<1 << CHAR_BIT> CharBits;
233
0
  for (size_type i = 0; i != Chars.size(); ++i)
234
0
    CharBits.set((unsigned char)Chars[i]);
235
0
236
0
  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
237
0
    if (CharBits.test((unsigned char)Data[i]))
238
0
      return i;
239
0
  return npos;
240
0
}
241
242
/// find_first_not_of - Find the first character in the string that is not
243
/// \arg C or npos if not found.
244
0
StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
245
0
  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
246
0
    if (Data[i] != C)
247
0
      return i;
248
0
  return npos;
249
0
}
250
251
/// find_first_not_of - Find the first character in the string that is not
252
/// in the string \arg Chars, or npos if not found.
253
///
254
/// Note: O(size() + Chars.size())
255
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
256
0
                                                  size_t From) const {
257
0
  std::bitset<1 << CHAR_BIT> CharBits;
258
0
  for (size_type i = 0; i != Chars.size(); ++i)
259
0
    CharBits.set((unsigned char)Chars[i]);
260
0
261
0
  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
262
0
    if (!CharBits.test((unsigned char)Data[i]))
263
0
      return i;
264
0
  return npos;
265
0
}
266
267
/// find_last_of - Find the last character in the string that is in \arg C,
268
/// or npos if not found.
269
///
270
/// Note: O(size() + Chars.size())
271
StringRef::size_type StringRef::find_last_of(StringRef Chars,
272
2
                                             size_t From) const {
273
2
  std::bitset<1 << CHAR_BIT> CharBits;
274
4
  for (size_type i = 0; i != Chars.size(); ++i)
275
2
    CharBits.set((unsigned char)Chars[i]);
276
2
277
36
  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
278
36
    if (CharBits.test((unsigned char)Data[i]))
279
2
      return i;
280
2
  return npos;
281
2
}
282
283
/// find_last_not_of - Find the last character in the string that is not
284
/// \arg C, or npos if not found.
285
0
StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
286
0
  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
287
0
    if (Data[i] != C)
288
0
      return i;
289
0
  return npos;
290
0
}
291
292
/// find_last_not_of - Find the last character in the string that is not in
293
/// \arg Chars, or npos if not found.
294
///
295
/// Note: O(size() + Chars.size())
296
StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
297
0
                                                 size_t From) const {
298
0
  std::bitset<1 << CHAR_BIT> CharBits;
299
0
  for (size_type i = 0, e = Chars.size(); i != e; ++i)
300
0
    CharBits.set((unsigned char)Chars[i]);
301
0
302
0
  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
303
0
    if (!CharBits.test((unsigned char)Data[i]))
304
0
      return i;
305
0
  return npos;
306
0
}
307
308
void StringRef::split(SmallVectorImpl<StringRef> &A,
309
                      StringRef Separator, int MaxSplit,
310
0
                      bool KeepEmpty) const {
311
0
  StringRef S = *this;
312
0
313
0
  // Count down from MaxSplit. When MaxSplit is -1, this will just split
314
0
  // "forever". This doesn't support splitting more than 2^31 times
315
0
  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
316
0
  // but that seems unlikely to be useful.
317
0
  while (MaxSplit-- != 0) {
318
0
    size_t Idx = S.find(Separator);
319
0
    if (Idx == npos)
320
0
      break;
321
0
322
0
    // Push this split.
323
0
    if (KeepEmpty || Idx > 0)
324
0
      A.push_back(S.slice(0, Idx));
325
0
326
0
    // Jump forward.
327
0
    S = S.slice(Idx + Separator.size(), npos);
328
0
  }
329
0
330
0
  // Push the tail.
331
0
  if (KeepEmpty || !S.empty())
332
0
    A.push_back(S);
333
0
}
334
335
void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
336
8
                      int MaxSplit, bool KeepEmpty) const {
337
8
  StringRef S = *this;
338
8
339
8
  // Count down from MaxSplit. When MaxSplit is -1, this will just split
340
8
  // "forever". This doesn't support splitting more than 2^31 times
341
8
  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
342
8
  // but that seems unlikely to be useful.
343
32
  while (MaxSplit-- != 0) {
344
26
    size_t Idx = S.find(Separator);
345
26
    if (Idx == npos)
346
2
      break;
347
24
348
24
    // Push this split.
349
24
    if (KeepEmpty || Idx > 0)
350
24
      A.push_back(S.slice(0, Idx));
351
24
352
24
    // Jump forward.
353
24
    S = S.slice(Idx + 1, npos);
354
24
  }
355
8
356
8
  // Push the tail.
357
8
  if (KeepEmpty || !S.empty())
358
8
    A.push_back(S);
359
8
}
360
361
//===----------------------------------------------------------------------===//
362
// Helpful Algorithms
363
//===----------------------------------------------------------------------===//
364
365
/// count - Return the number of non-overlapped occurrences of \arg Str in
366
/// the string.
367
0
size_t StringRef::count(StringRef Str) const {
368
0
  size_t Count = 0;
369
0
  size_t N = Str.size();
370
0
  if (!N || N > Length)
371
0
    return 0;
372
0
  for (size_t i = 0, e = Length - N + 1; i < e;) {
373
0
    if (substr(i, N).equals(Str)) {
374
0
      ++Count;
375
0
      i += N;
376
0
    }
377
0
    else
378
0
      ++i;
379
0
  }
380
0
  return Count;
381
0
}
382
383
0
static unsigned GetAutoSenseRadix(StringRef &Str) {
384
0
  if (Str.empty())
385
0
    return 10;
386
0
387
0
  if (Str.startswith("0x") || Str.startswith("0X")) {
388
0
    Str = Str.substr(2);
389
0
    return 16;
390
0
  }
391
0
392
0
  if (Str.startswith("0b") || Str.startswith("0B")) {
393
0
    Str = Str.substr(2);
394
0
    return 2;
395
0
  }
396
0
397
0
  if (Str.startswith("0o")) {
398
0
    Str = Str.substr(2);
399
0
    return 8;
400
0
  }
401
0
402
0
  if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) {
403
0
    Str = Str.substr(1);
404
0
    return 8;
405
0
  }
406
0
407
0
  return 10;
408
0
}
409
410
bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
411
0
                                  unsigned long long &Result) {
412
0
  // Autosense radix if not specified.
413
0
  if (Radix == 0)
414
0
    Radix = GetAutoSenseRadix(Str);
415
0
416
0
  // Empty strings (after the radix autosense) are invalid.
417
0
  if (Str.empty()) return true;
418
0
419
0
  // Parse all the bytes of the string given this radix.  Watch for overflow.
420
0
  StringRef Str2 = Str;
421
0
  Result = 0;
422
0
  while (!Str2.empty()) {
423
0
    unsigned CharVal;
424
0
    if (Str2[0] >= '0' && Str2[0] <= '9')
425
0
      CharVal = Str2[0] - '0';
426
0
    else if (Str2[0] >= 'a' && Str2[0] <= 'z')
427
0
      CharVal = Str2[0] - 'a' + 10;
428
0
    else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
429
0
      CharVal = Str2[0] - 'A' + 10;
430
0
    else
431
0
      break;
432
0
433
0
    // If the parsed value is larger than the integer radix, we cannot
434
0
    // consume any more characters.
435
0
    if (CharVal >= Radix)
436
0
      break;
437
0
438
0
    // Add in this character.
439
0
    unsigned long long PrevResult = Result;
440
0
    Result = Result * Radix + CharVal;
441
0
442
0
    // Check for overflow by shifting back and seeing if bits were lost.
443
0
    if (Result / Radix < PrevResult)
444
0
      return true;
445
0
446
0
    Str2 = Str2.substr(1);
447
0
  }
448
0
449
0
  // We consider the operation a failure if no characters were consumed
450
0
  // successfully.
451
0
  if (Str.size() == Str2.size())
452
0
    return true;
453
0
454
0
  Str = Str2;
455
0
  return false;
456
0
}
457
458
bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
459
0
                                long long &Result) {
460
0
  unsigned long long ULLVal;
461
0
462
0
  // Handle positive strings first.
463
0
  if (Str.empty() || Str.front() != '-') {
464
0
    if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
465
0
        // Check for value so large it overflows a signed value.
466
0
        (long long)ULLVal < 0)
467
0
      return true;
468
0
    Result = ULLVal;
469
0
    return false;
470
0
  }
471
0
472
0
  // Get the positive part of the value.
473
0
  StringRef Str2 = Str.drop_front(1);
474
0
  if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
475
0
      // Reject values so large they'd overflow as negative signed, but allow
476
0
      // "-0".  This negates the unsigned so that the negative isn't undefined
477
0
      // on signed overflow.
478
0
      (long long)-ULLVal > 0)
479
0
    return true;
480
0
481
0
  Str = Str2;
482
0
  Result = -ULLVal;
483
0
  return false;
484
0
}
485
486
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
487
/// sequence of radix up to 36 to an unsigned long long value.
488
bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
489
0
                                unsigned long long &Result) {
490
0
  if (consumeUnsignedInteger(Str, Radix, Result))
491
0
    return true;
492
0
493
0
  // For getAsUnsignedInteger, we require the whole string to be consumed or
494
0
  // else we consider it a failure.
495
0
  return !Str.empty();
496
0
}
497
498
bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
499
0
                              long long &Result) {
500
0
  if (consumeSignedInteger(Str, Radix, Result))
501
0
    return true;
502
0
503
0
  // For getAsSignedInteger, we require the whole string to be consumed or else
504
0
  // we consider it a failure.
505
0
  return !Str.empty();
506
0
}
507
508
0
bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
509
0
  StringRef Str = *this;
510
0
511
0
  // Autosense radix if not specified.
512
0
  if (Radix == 0)
513
0
    Radix = GetAutoSenseRadix(Str);
514
0
515
0
  assert(Radix > 1 && Radix <= 36);
516
0
517
0
  // Empty strings (after the radix autosense) are invalid.
518
0
  if (Str.empty()) return true;
519
0
520
0
  // Skip leading zeroes.  This can be a significant improvement if
521
0
  // it means we don't need > 64 bits.
522
0
  while (!Str.empty() && Str.front() == '0')
523
0
    Str = Str.substr(1);
524
0
525
0
  // If it was nothing but zeroes....
526
0
  if (Str.empty()) {
527
0
    Result = APInt(64, 0);
528
0
    return false;
529
0
  }
530
0
531
0
  // (Over-)estimate the required number of bits.
532
0
  unsigned Log2Radix = 0;
533
0
  while ((1U << Log2Radix) < Radix) Log2Radix++;
534
0
  bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
535
0
536
0
  unsigned BitWidth = Log2Radix * Str.size();
537
0
  if (BitWidth < Result.getBitWidth())
538
0
    BitWidth = Result.getBitWidth(); // don't shrink the result
539
0
  else if (BitWidth > Result.getBitWidth())
540
0
    Result = Result.zext(BitWidth);
541
0
542
0
  APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
543
0
  if (!IsPowerOf2Radix) {
544
0
    // These must have the same bit-width as Result.
545
0
    RadixAP = APInt(BitWidth, Radix);
546
0
    CharAP = APInt(BitWidth, 0);
547
0
  }
548
0
549
0
  // Parse all the bytes of the string given this radix.
550
0
  Result = 0;
551
0
  while (!Str.empty()) {
552
0
    unsigned CharVal;
553
0
    if (Str[0] >= '0' && Str[0] <= '9')
554
0
      CharVal = Str[0]-'0';
555
0
    else if (Str[0] >= 'a' && Str[0] <= 'z')
556
0
      CharVal = Str[0]-'a'+10;
557
0
    else if (Str[0] >= 'A' && Str[0] <= 'Z')
558
0
      CharVal = Str[0]-'A'+10;
559
0
    else
560
0
      return true;
561
0
562
0
    // If the parsed value is larger than the integer radix, the string is
563
0
    // invalid.
564
0
    if (CharVal >= Radix)
565
0
      return true;
566
0
567
0
    // Add in this character.
568
0
    if (IsPowerOf2Radix) {
569
0
      Result <<= Log2Radix;
570
0
      Result |= CharVal;
571
0
    } else {
572
0
      Result *= RadixAP;
573
0
      CharAP = CharVal;
574
0
      Result += CharAP;
575
0
    }
576
0
577
0
    Str = Str.substr(1);
578
0
  }
579
0
580
0
  return false;
581
0
}
582
583
0
bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
584
0
  APFloat F(0.0);
585
0
  auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven);
586
0
  if (errorToBool(StatusOrErr.takeError()))
587
0
    return true;
588
0
589
0
  APFloat::opStatus Status = *StatusOrErr;
590
0
  if (Status != APFloat::opOK) {
591
0
    if (!AllowInexact || !(Status & APFloat::opInexact))
592
0
      return true;
593
0
  }
594
0
595
0
  Result = F.convertToDouble();
596
0
  return false;
597
0
}
598
599
// Implementation of StringRef hashing.
600
0
hash_code llvm::hash_value(StringRef S) {
601
0
  return hash_combine_range(S.begin(), S.end());
602
0
}