Strings / Sequence Dynamic Programming
3.6.1 Longest Common Substring
3-Strings/3.6.1_Longest_Common_Substring.cpp
Given two strings, determine their longest common substring: a contiguous run of characters that appears in both. A dynamic program computes, for each pair of prefixes, the length of their longest common suffix; the largest value over all pairs locates a longest common substring. Only the previous row of the table is retained, so the auxiliary space is linear in the shorter string.
longest_common_substring(s1, s2)returns a longest common substring ofs1ands2(one such substring if several are tied in length), or the empty string if they have none in common.
Implementation
#include <string>
#include <vector>
using std::string;
string longest_common_substring(const string &s1, const string &s2) {
int n = static_cast<int>(s1.size()), m = static_cast<int>(s2.size());
if (n == 0 || m == 0) {
return "";
}
if (n < m) {
return longest_common_substring(s2, s1);
}
std::vector<int> curr(m), prev(m);
int pos = 0, len = 0;
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
if (s1[i] == s2[j]) {
curr[j] = (i > 0 && j > 0) ? prev[j - 1] + 1 : 1;
if (len < curr[j]) {
len = curr[j];
pos = i - curr[j] + 1;
}
} else {
curr[j] = 0;
}
}
curr.swap(prev);
}
return s1.substr(pos, len);
}
Example Usage
#include <cassert>
int main() {
assert(longest_common_substring("bbbabca", "aababcd") == "babc");
return 0;
}
/*
Given two strings, determine their longest common substring: a contiguous run of characters that
appears in both. A dynamic program computes, for each pair of prefixes, the length of their longest
common suffix; the largest value over all pairs locates a longest common substring. Only the
previous row of the table is retained, so the auxiliary space is linear in the shorter string.
- `longest_common_substring(s1, s2)` returns a longest common substring of `s1` and `s2` (one such
substring if several are tied in length), or the empty string if they have none in common.
Time Complexity:
- O(n*m) per call to `longest_common_substring(s1, s2)`, where $n$ and $m$ are the lengths of `s1`
and `s2`, respectively.
Space Complexity:
- O(min(n, m)) auxiliary heap space, where $n$ and $m$ are the lengths of `s1` and `s2`,
respectively.
*/
#include <string>
#include <vector>
using std::string;
string longest_common_substring(const string &s1, const string &s2) {
int n = static_cast<int>(s1.size()), m = static_cast<int>(s2.size());
if (n == 0 || m == 0) {
return "";
}
if (n < m) {
return longest_common_substring(s2, s1);
}
std::vector<int> curr(m), prev(m);
int pos = 0, len = 0;
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
if (s1[i] == s2[j]) {
curr[j] = (i > 0 && j > 0) ? prev[j - 1] + 1 : 1;
if (len < curr[j]) {
len = curr[j];
pos = i - curr[j] + 1;
}
} else {
curr[j] = 0;
}
}
curr.swap(prev);
}
return s1.substr(pos, len);
}
/*** Example Usage ***/
#include <cassert>
int main() {
assert(longest_common_substring("bbbabca", "aababcd") == "babc");
return 0;
}