/home/arjun/llvm-project/llvm/include/llvm/ADT/edit_distance.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file defines a Levenshtein distance function that works for any two |
10 | | // sequences, with each element of each sequence being analogous to a character |
11 | | // in a string. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #ifndef LLVM_ADT_EDIT_DISTANCE_H |
16 | | #define LLVM_ADT_EDIT_DISTANCE_H |
17 | | |
18 | | #include "llvm/ADT/ArrayRef.h" |
19 | | #include <algorithm> |
20 | | #include <memory> |
21 | | |
22 | | namespace llvm { |
23 | | |
24 | | /// Determine the edit distance between two sequences. |
25 | | /// |
26 | | /// \param FromArray the first sequence to compare. |
27 | | /// |
28 | | /// \param ToArray the second sequence to compare. |
29 | | /// |
30 | | /// \param AllowReplacements whether to allow element replacements (change one |
31 | | /// element into another) as a single operation, rather than as two operations |
32 | | /// (an insertion and a removal). |
33 | | /// |
34 | | /// \param MaxEditDistance If non-zero, the maximum edit distance that this |
35 | | /// routine is allowed to compute. If the edit distance will exceed that |
36 | | /// maximum, returns \c MaxEditDistance+1. |
37 | | /// |
38 | | /// \returns the minimum number of element insertions, removals, or (if |
39 | | /// \p AllowReplacements is \c true) replacements needed to transform one of |
40 | | /// the given sequences into the other. If zero, the sequences are identical. |
41 | | template<typename T> |
42 | | unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray, |
43 | | bool AllowReplacements = true, |
44 | 0 | unsigned MaxEditDistance = 0) { |
45 | 0 | // The algorithm implemented below is the "classic" |
46 | 0 | // dynamic-programming algorithm for computing the Levenshtein |
47 | 0 | // distance, which is described here: |
48 | 0 | // |
49 | 0 | // http://en.wikipedia.org/wiki/Levenshtein_distance |
50 | 0 | // |
51 | 0 | // Although the algorithm is typically described using an m x n |
52 | 0 | // array, only one row plus one element are used at a time, so this |
53 | 0 | // implementation just keeps one vector for the row. To update one entry, |
54 | 0 | // only the entries to the left, top, and top-left are needed. The left |
55 | 0 | // entry is in Row[x-1], the top entry is what's in Row[x] from the last |
56 | 0 | // iteration, and the top-left entry is stored in Previous. |
57 | 0 | typename ArrayRef<T>::size_type m = FromArray.size(); |
58 | 0 | typename ArrayRef<T>::size_type n = ToArray.size(); |
59 | 0 |
|
60 | 0 | const unsigned SmallBufferSize = 64; |
61 | 0 | unsigned SmallBuffer[SmallBufferSize]; |
62 | 0 | std::unique_ptr<unsigned[]> Allocated; |
63 | 0 | unsigned *Row = SmallBuffer; |
64 | 0 | if (n + 1 > SmallBufferSize) { |
65 | 0 | Row = new unsigned[n + 1]; |
66 | 0 | Allocated.reset(Row); |
67 | 0 | } |
68 | 0 |
|
69 | 0 | for (unsigned i = 1; i <= n; ++i) |
70 | 0 | Row[i] = i; |
71 | 0 |
|
72 | 0 | for (typename ArrayRef<T>::size_type y = 1; y <= m; ++y) { |
73 | 0 | Row[0] = y; |
74 | 0 | unsigned BestThisRow = Row[0]; |
75 | 0 |
|
76 | 0 | unsigned Previous = y - 1; |
77 | 0 | for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) { |
78 | 0 | int OldRow = Row[x]; |
79 | 0 | if (AllowReplacements) { |
80 | 0 | Row[x] = std::min( |
81 | 0 | Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u), |
82 | 0 | std::min(Row[x-1], Row[x])+1); |
83 | 0 | } |
84 | 0 | else { |
85 | 0 | if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous; |
86 | 0 | else Row[x] = std::min(Row[x-1], Row[x]) + 1; |
87 | 0 | } |
88 | 0 | Previous = OldRow; |
89 | 0 | BestThisRow = std::min(BestThisRow, Row[x]); |
90 | 0 | } |
91 | 0 |
|
92 | 0 | if (MaxEditDistance && BestThisRow > MaxEditDistance) |
93 | 0 | return MaxEditDistance + 1; |
94 | 0 | } |
95 | 0 |
|
96 | 0 | unsigned Result = Row[n]; |
97 | 0 | return Result; |
98 | 0 | } |
99 | | |
100 | | } // End llvm namespace |
101 | | |
102 | | #endif |