Alex's Anthology of Algorithms Common Code for Contests in Concise C++
Strings / Sequence Dynamic Programming

3.6.2 Longest Common Subsequence

3-Strings/3.6.2_Longest_Common_Subsequence.cpp

View on GitHub

Given two strings, determine their longest common subsequence. A subsequence is a string that can be derived from the original string by deleting some elements without changing the order of the remaining elements (e.g. "ACE" is a subsequence of "ABCDE", but "BAE" is not).

  • longest_common_subsequence(s1, s2) returns the longest common subsequence of strings s1 and s2 using a classic dynamic programming approach. This implementation computes dp[i][j] (the length of the longest common subsequence for the length $i$ prefix of s1 and the length $j$ prefix of s2) before following the path backwards to construct the answer.
  • hirschberg_lcs(s1, s2) returns the longest common subsequence of strings s1 and s2 using the more memory efficient Hirschberg's algorithm.

Implementation

#include <algorithm>
#include <iterator>
#include <string>
#include <vector>
using std::string;

string longest_common_subsequence(const string &s1, const string &s2) {
  int n = static_cast<int>(s1.size()), m = static_cast<int>(s2.size());
  std::vector<std::vector<int>> dp(n + 1, std::vector<int>(m + 1, 0));
  for (int i = 1; i <= n; i++) {
    for (int j = 1; j <= m; j++) {
      if (s1[i - 1] == s2[j - 1]) {
        dp[i][j] = dp[i - 1][j - 1] + 1;
      } else {
        dp[i][j] = std::max(dp[i][j - 1], dp[i - 1][j]);
      }
    }
  }
  string res;
  for (int i = n, j = m; i > 0 && j > 0;) {
    if (s1[i - 1] == s2[j - 1]) {
      res += s1[i - 1];
      i--;
      j--;
    } else if (dp[i - 1][j] >= dp[i][j - 1]) {
      i--;
    } else {
      j--;
    }
  }
  std::reverse(res.begin(), res.end());
  return res;
}

template<class It>
std::vector<int> lcs_len(It lo1, It hi1, It lo2, It hi2) {
  std::vector<int> res(std::distance(lo2, hi2) + 1), prev(res);
  for (It it1 = lo1; it1 != hi1; ++it1) {
    res.swap(prev);
    int i = 0;
    for (It it2 = lo2; it2 != hi2; ++it2) {
      res[i + 1] = (*it1 == *it2) ? prev[i] + 1 : std::max(res[i], prev[i + 1]);
      i++;
    }
  }
  return res;
}

template<class It>
void hirschberg_rec(It lo1, It hi1, It lo2, It hi2, string *res) {
  if (lo1 == hi1) {
    return;
  }
  if (lo1 + 1 == hi1) {
    if (std::find(lo2, hi2, *lo1) != hi2) {
      *res += *lo1;
    }
    return;
  }
  It mid1 = lo1 + (hi1 - lo1) / 2;
  std::reverse_iterator<It> rlo1(hi1), rmid1(mid1), rlo2(hi2), rhi2(lo2);
  std::vector<int> fwd = lcs_len(lo1, mid1, lo2, hi2);
  std::vector<int> rev = lcs_len(rlo1, rmid1, rlo2, rhi2);
  It mid2 = lo2;
  int maxlen = -1;
  for (int i = 0, j = static_cast<int>(rev.size()) - 1; i < static_cast<int>(fwd.size());
       i++, j--) {
    if (fwd[i] + rev[j] > maxlen) {
      maxlen = fwd[i] + rev[j];
      mid2 = lo2 + i;
    }
  }
  hirschberg_rec(lo1, mid1, lo2, mid2, res);
  hirschberg_rec(mid1, hi1, mid2, hi2, res);
}

string hirschberg_lcs(const string &s1, const string &s2) {
  if (s1.size() < s2.size()) {
    return hirschberg_lcs(s2, s1);
  }
  string res;
  hirschberg_rec(s1.begin(), s1.end(), s2.begin(), s2.end(), &res);
  return res;
}

Example Usage

#include <cassert>

int main() {
  assert(longest_common_subsequence("xmjyauz", "mzjawxu") == "mjau");
  assert(hirschberg_lcs("xmjyauz", "mzjawxu") == "mjau");
  return 0;
}