Elementary Algorithms / Arrays and Dynamic Programming

1.2.4 Iterated Function Cycle Detection

1-Elementary-Algorithms/1.2.4_Iterated_Function_Cycle_Detection.cpp

View on GitHub

Given a function $f$ mapping a finite set to itself and a starting value $x_0$, return the position and length of the cycle reached by repeatedly applying $f$. Formally, the sequence $x_0, x_1 = f(x_0), x_2 = f(x_1), \ldots, x_n = f(x_{n - 1}), \ldots$ must eventually repeat some value. If $x_i = x_j$ for $i < j$, then the sequence from $x_i$ to $x_{j - 1}$ repeats forever.

This is useful for detecting cycles in functional graphs, pseudo-random generators, Pollard rho style algorithms, degenerate linked lists, and arrays where each index points to the next index. For example, if nums has size $n + 1$ and all values are in $[1, n]$, then $f(i)$ = nums[i] forms a functional graph; the duplicate value is the entry point of the cycle reached from 0.

Floyd's cycle-finding algorithm, a.k.a. the "tortoise and the hare algorithm", is a space-efficient default choice: it moves two pointers through the sequence at different speeds, finds a meeting point inside the cycle, then locates the cycle start. Brent's algorithm is a compact variant that usually calls $f$ fewer times by only resetting the tortoise at powers of two; prefer it when evaluating $f$ is expensive and constant factors matter. In the detection phase, Brent advances one pointer with one call to $f$ per step, while Floyd advances the tortoise once and the hare twice, using three calls to $f$ per step.

find_cycle_floyd(f, x0) returns the reached cycle as a pair (start, length), where start is the smallest index $i$ such that $x_i$ is in the cycle, and length is the number of distinct values in the cycle.
find_cycle_brent(f, x0) does the same with fewer calls to $f$ in many cases.

Time Complexity

$O(m + n)$ per call, where $m$ is the first index in the cycle and $n$ is the cycle length.

Space Complexity

$O(1)$ auxiliary.

Implementation

#include <utility>

template<class Fn, class T>
std::pair<int, int> find_cycle_floyd(Fn f, T x0) {
  T tortoise = f(x0), hare = f(f(x0));
  while (tortoise != hare) {
    tortoise = f(tortoise);
    hare = f(f(hare));
  }
  int start = 0;
  tortoise = x0;
  while (tortoise != hare) {
    tortoise = f(tortoise);
    hare = f(hare);
    start++;
  }
  int length = 1;
  hare = f(tortoise);
  while (tortoise != hare) {
    hare = f(hare);
    length++;
  }
  return {start, length};
}

template<class Fn, class T>
std::pair<int, int> find_cycle_brent(Fn f, T x0) {
  int power = 1, length = 1;
  T tortoise = x0, hare = f(x0);
  while (tortoise != hare) {
    if (power == length) {
      tortoise = hare;
      power *= 2;
      length = 0;
    }
    hare = f(hare);
    length++;
  }
  hare = x0;
  for (int i = 0; i < length; i++) {
    hare = f(hare);
  }
  int start = 0;
  tortoise = x0;
  while (tortoise != hare) {
    tortoise = f(tortoise);
    hare = f(hare);
    start++;
  }
  return {start, length};
}

Example Usage

#include <cassert>
#include <set>
#include <vector>
using namespace std;

int f(int x) {
  return (123 * x * x + 4567890) % 1337;
}

void verify(int x0, int start, int length) {
  set<int> s;
  int x = x0;
  for (int i = 0; i < start; i++) {
    assert(!s.count(x));
    s.insert(x);
    x = f(x);
  }
  int startx = x;
  s.clear();
  for (int i = 0; i < length; i++) {
    assert(!s.count(x));
    s.insert(x);
    x = f(x);
  }
  assert(startx == x);
}

// Given n + 1 integers within [1, n], where one value in [1, n] is present 2 or more times (but
// other values in [1, n] are present at most once), find the duplicate without modifying the array.
// Interpret each value as a pointer to the next index; the duplicate is the cycle entry.
int find_duplicate_integer(const vector<int> &nums) {
  auto [start, _] = find_cycle_floyd([&](int i) { return nums[i]; }, 0);
  int res = 0;
  for (int i = 0; i < start; i++) {
    res = nums[res];
  }
  return res;
}

int main() {
  int x0 = 0;
  auto [floyd_start, floyd_len] = find_cycle_floyd(f, x0);
  assert(floyd_start == 4 && floyd_len == 2);
  verify(x0, floyd_start, floyd_len);

  auto [brent_start, brent_len] = find_cycle_brent(f, x0);
  assert(brent_start == floyd_start && brent_len == floyd_len);

  assert(find_duplicate_integer({1, 3, 4, 2, 2}) == 2);
  assert(find_duplicate_integer({3, 1, 3, 4, 2}) == 3);
  return 0;
}

/*

Given a function $f$ mapping a finite set to itself and a starting value $x_0$, return the
position and length of the cycle reached by repeatedly applying $f$. Formally, the sequence
  $x_0, x_1 = f(x_0), x_2 = f(x_1), \ldots, x_n = f(x_{n - 1}), \ldots$
must eventually repeat some value. If $x_i = x_j$ for $i < j$, then the sequence from $x_i$ to
$x_{j - 1}$ repeats forever.

This is useful for detecting cycles in functional graphs, pseudo-random generators, Pollard rho
style algorithms, degenerate linked lists, and arrays where each index points to the next index.
For example, if `nums` has size $n + 1$ and all values are in $[1, n]$, then $f(i)$ = `nums[i]`
forms a functional graph; the duplicate value is the entry point of the cycle reached from 0.

Floyd's cycle-finding algorithm, a.k.a. the "tortoise and the hare algorithm", is a space-efficient
default choice: it moves two pointers through the sequence at different speeds, finds a meeting
point inside the cycle, then locates the cycle start. Brent's algorithm is a compact variant that
usually calls $f$ fewer times by only resetting the tortoise at powers of two; prefer it when
evaluating $f$ is expensive and constant factors matter. In the detection phase, Brent advances
one pointer with one call to $f$ per step, while Floyd advances the tortoise once and the hare
twice, using three calls to $f$ per step.

- `find_cycle_floyd(f, x0)` returns the reached cycle as a pair (`start`, `length`), where `start`
  is the smallest index $i$ such that $x_i$ is in the cycle, and `length` is the number of distinct
  values in the cycle.
- `find_cycle_brent(f, x0)` does the same with fewer calls to $f$ in many cases.

Time Complexity:
- O(m + n) per call, where $m$ is the first index in the cycle and $n$ is the cycle length.

Space Complexity:
- O(1) auxiliary.

*/

#include <utility>

template<class Fn, class T>
std::pair<int, int> find_cycle_floyd(Fn f, T x0) {
  T tortoise = f(x0), hare = f(f(x0));
  while (tortoise != hare) {
    tortoise = f(tortoise);
    hare = f(f(hare));
  }
  int start = 0;
  tortoise = x0;
  while (tortoise != hare) {
    tortoise = f(tortoise);
    hare = f(hare);
    start++;
  }
  int length = 1;
  hare = f(tortoise);
  while (tortoise != hare) {
    hare = f(hare);
    length++;
  }
  return {start, length};
}

template<class Fn, class T>
std::pair<int, int> find_cycle_brent(Fn f, T x0) {
  int power = 1, length = 1;
  T tortoise = x0, hare = f(x0);
  while (tortoise != hare) {
    if (power == length) {
      tortoise = hare;
      power *= 2;
      length = 0;
    }
    hare = f(hare);
    length++;
  }
  hare = x0;
  for (int i = 0; i < length; i++) {
    hare = f(hare);
  }
  int start = 0;
  tortoise = x0;
  while (tortoise != hare) {
    tortoise = f(tortoise);
    hare = f(hare);
    start++;
  }
  return {start, length};
}

/*** Example Usage ***/

#include <cassert>
#include <set>
#include <vector>
using namespace std;

int f(int x) {
  return (123 * x * x + 4567890) % 1337;
}

void verify(int x0, int start, int length) {
  set<int> s;
  int x = x0;
  for (int i = 0; i < start; i++) {
    assert(!s.count(x));
    s.insert(x);
    x = f(x);
  }
  int startx = x;
  s.clear();
  for (int i = 0; i < length; i++) {
    assert(!s.count(x));
    s.insert(x);
    x = f(x);
  }
  assert(startx == x);
}

// Given n + 1 integers within [1, n], where one value in [1, n] is present 2 or more times (but
// other values in [1, n] are present at most once), find the duplicate without modifying the array.
// Interpret each value as a pointer to the next index; the duplicate is the cycle entry.
int find_duplicate_integer(const vector<int> &nums) {
  auto [start, _] = find_cycle_floyd([&](int i) { return nums[i]; }, 0);
  int res = 0;
  for (int i = 0; i < start; i++) {
    res = nums[res];
  }
  return res;
}

int main() {
  int x0 = 0;
  auto [floyd_start, floyd_len] = find_cycle_floyd(f, x0);
  assert(floyd_start == 4 && floyd_len == 2);
  verify(x0, floyd_start, floyd_len);

  auto [brent_start, brent_len] = find_cycle_brent(f, x0);
  assert(brent_start == floyd_start && brent_len == floyd_len);

  assert(find_duplicate_integer({1, 3, 4, 2, 2}) == 2);
  assert(find_duplicate_integer({3, 1, 3, 4, 2}) == 3);
  return 0;
}