001/*
002 * Copyright (C) 2017 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.graph;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021
022import com.google.common.annotations.Beta;
023import com.google.common.collect.AbstractIterator;
024import com.google.common.collect.ImmutableSet;
025
026import java.util.ArrayDeque;
027import java.util.Deque;
028import java.util.HashSet;
029import java.util.Iterator;
030import java.util.Set;
031
032
033/**
034 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s)
035 * using a specified {@link SuccessorsFunction}.
036 *
037 * <p>There are two entry points for creating a {@code Traverser}: {@link
038 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one
039 * based on your answers to the following questions:
040 *
041 * <ol>
042 *   <li>Is there only one path to any node that's reachable from any start node? (If so, the graph
043 *       to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.)
044 *   <li>Are the node objects' implementations of {@code equals()}/{@code hashCode()} <a
045 *       href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursive</a>?
046 * </ol>
047 *
048 * <p>If your answers are:
049 *
050 * <ul>
051 *   <li>(1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}.
052 *   <li>(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}.
053 *   <li>(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient.
054 *   <li>(1) "no" and (2) "yes", <b><i>neither will work</i></b>, but if you transform your node
055 *       objects into a non-recursive form, you can use {@code forGraph()}.
056 * </ul>
057 *
058 * @author Jens Nyman
059 * @param <N> Node parameter type
060 * @since 23.1
061 */
062@Beta
063
064public abstract class Traverser<N> {
065  private final SuccessorsFunction<N> successorFunction;
066
067  private Traverser(SuccessorsFunction<N> successorFunction) {
068    this.successorFunction = checkNotNull(successorFunction);
069  }
070
071  /**
072   * Creates a new traverser for the given general {@code graph}.
073   *
074   * <p>Traversers created using this method are guaranteed to visit each node reachable from the
075   * start node(s) at most once.
076   *
077   * <p>If you know that no node in {@code graph} is reachable by more than one path from the start
078   * node(s), consider using {@link #forTree(SuccessorsFunction)} instead.
079   *
080   * <p><b>Performance notes</b>
081   *
082   * <ul>
083   *   <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from
084   *       the start node), assuming that the node objects have <i>O(1)</i> {@code equals()} and
085   *       {@code hashCode()} implementations. (See the <a
086   *       href="https://github.com/google/guava/wiki/GraphsExplained#elements-must-be-useable-as-map-keys">
087   *       notes on element objects</a> for more information.)
088   *   <li>While traversing, the traverser will use <i>O(n)</i> space (where <i>n</i> is the number
089   *       of nodes that have thus far been visited), plus <i>O(H)</i> space (where <i>H</i> is the
090   *       number of nodes that have been seen but not yet visited, that is, the "horizon").
091   * </ul>
092   *
093   * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles.
094   */
095  public static <N> Traverser<N> forGraph(final SuccessorsFunction<N> graph) {
096    return new Traverser<N>(graph) {
097      @Override
098      Traversal<N> newTraversal() {
099        return Traversal.inGraph(graph);
100      }
101    };
102  }
103
104  /**
105   * Creates a new traverser for a directed acyclic graph that has at most one path from the start
106   * node(s) to any node reachable from the start node(s), and has no paths from any start node to
107   * any other start node, such as a tree or forest.
108   *
109   * <p>{@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data
110   * structure being traversed is, in addition to being a tree/forest, also defined <a
111   * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursively</a>.
112   * This is because the {@code forTree()}-based implementations don't keep track of visited nodes,
113   * and therefore don't need to call `equals()` or `hashCode()` on the node objects; this saves
114   * both time and space versus traversing the same graph using {@code forGraph()}.
115   *
116   * <p>Providing a graph to be traversed for which there is more than one path from the start
117   * node(s) to any node may lead to:
118   *
119   * <ul>
120   *   <li>Traversal not terminating (if the graph has cycles)
121   *   <li>Nodes being visited multiple times (if multiple paths exist from any start node to any
122   *       node reachable from any start node)
123   * </ul>
124   *
125   * <p><b>Performance notes</b>
126   *
127   * <ul>
128   *   <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from
129   *       the start node).
130   *   <li>While traversing, the traverser will use <i>O(H)</i> space (where <i>H</i> is the number
131   *       of nodes that have been seen but not yet visited, that is, the "horizon").
132   * </ul>
133   *
134   * <p><b>Examples</b> (all edges are directed facing downwards)
135   *
136   * <p>The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code
137   * b} were <i>also</i> a start node, then there would be multiple paths to reach {@code e} and
138   * {@code h}.
139   *
140   * <pre>{@code
141   *    a     b      c
142   *   / \   / \     |
143   *  /   \ /   \    |
144   * d     e     f   g
145   *       |
146   *       |
147   *       h
148   * }</pre>
149   *
150   * <p>.
151   *
152   * <p>The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code
153   * b} were a start node, there would be multiple paths to {@code f}.
154   *
155   * <pre>{@code
156   *    a     b
157   *   / \   / \
158   *  /   \ /   \
159   * c     d     e
160   *        \   /
161   *         \ /
162   *          f
163   * }</pre>
164   *
165   * <p><b>Note on binary trees</b>
166   *
167   * <p>This method can be used to traverse over a binary tree. Given methods {@code
168   * leftChild(node)} and {@code rightChild(node)}, this method can be called as
169   *
170   * <pre>{@code
171   * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node)));
172   * }</pre>
173   *
174   * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most
175   *     one path between any two nodes
176   */
177  public static <N> Traverser<N> forTree(final SuccessorsFunction<N> tree) {
178    if (tree instanceof BaseGraph) {
179      checkArgument(((BaseGraph<?>) tree).isDirected(), "Undirected graphs can never be trees.");
180    }
181    if (tree instanceof Network) {
182      checkArgument(((Network<?, ?>) tree).isDirected(), "Undirected networks can never be trees.");
183    }
184    return new Traverser<N>(tree) {
185      @Override
186      Traversal<N> newTraversal() {
187        return Traversal.inTree(tree);
188      }
189    };
190  }
191
192  /**
193   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
194   * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then
195   * depth 1, then 2, and so on.
196   *
197   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
198   * the order {@code abcdef} (assuming successors are returned in alphabetical order).
199   *
200   * <pre>{@code
201   * b ---- a ---- d
202   * |      |
203   * |      |
204   * e ---- c ---- f
205   * }</pre>
206   *
207   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
208   * while iteration is in progress.
209   *
210   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
211   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
212   * number of nodes as follows:
213   *
214   * <pre>{@code
215   * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes);
216   * }</pre>
217   *
218   * <p>See <a href="https://en.wikipedia.org/wiki/Breadth-first_search">Wikipedia</a> for more
219   * info.
220   *
221   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
222   */
223  public final Iterable<N> breadthFirst(N startNode) {
224    return breadthFirst(ImmutableSet.of(startNode));
225  }
226
227  /**
228   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
229   * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first
230   * traversal of a graph with an additional root node whose successors are the listed {@code
231   * startNodes}.
232   *
233   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
234   * @see #breadthFirst(Object)
235   * @since 24.1
236   */
237  public final Iterable<N> breadthFirst(Iterable<? extends N> startNodes) {
238    final ImmutableSet<N> validated = validate(startNodes);
239    return new Iterable<N>() {
240      @Override
241      public Iterator<N> iterator() {
242        return newTraversal().breadthFirst(validated.iterator());
243      }
244    };
245  }
246
247  /**
248   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
249   * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the
250   * {@code Iterable} in the order in which they are first visited.
251   *
252   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
253   * the order {@code abecfd} (assuming successors are returned in alphabetical order).
254   *
255   * <pre>{@code
256   * b ---- a ---- d
257   * |      |
258   * |      |
259   * e ---- c ---- f
260   * }</pre>
261   *
262   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
263   * while iteration is in progress.
264   *
265   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
266   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
267   * number of nodes as follows:
268   *
269   * <pre>{@code
270   * Iterables.limit(
271   *     Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes);
272   * }</pre>
273   *
274   * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info.
275   *
276   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
277   */
278  public final Iterable<N> depthFirstPreOrder(N startNode) {
279    return depthFirstPreOrder(ImmutableSet.of(startNode));
280  }
281
282  /**
283   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
284   * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a
285   * depth-first pre-order traversal of a graph with an additional root node whose successors are
286   * the listed {@code startNodes}.
287   *
288   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
289   * @see #depthFirstPreOrder(Object)
290   * @since 24.1
291   */
292  public final Iterable<N> depthFirstPreOrder(Iterable<? extends N> startNodes) {
293    final ImmutableSet<N> validated = validate(startNodes);
294    return new Iterable<N>() {
295      @Override
296      public Iterator<N> iterator() {
297        return newTraversal().preOrder(validated.iterator());
298      }
299    };
300  }
301
302  /**
303   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
304   * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the
305   * {@code Iterable} in the order in which they are visited for the last time.
306   *
307   * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in
308   * the order {@code fcebda} (assuming successors are returned in alphabetical order).
309   *
310   * <pre>{@code
311   * b ---- a ---- d
312   * |      |
313   * |      |
314   * e ---- c ---- f
315   * }</pre>
316   *
317   * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change
318   * while iteration is in progress.
319   *
320   * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will
321   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
322   * number of nodes as follows:
323   *
324   * <pre>{@code
325   * Iterables.limit(
326   *     Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes);
327   * }</pre>
328   *
329   * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info.
330   *
331   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
332   */
333  public final Iterable<N> depthFirstPostOrder(N startNode) {
334    return depthFirstPostOrder(ImmutableSet.of(startNode));
335  }
336
337  /**
338   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
339   * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a
340   * depth-first post-order traversal of a graph with an additional root node whose successors are
341   * the listed {@code startNodes}.
342   *
343   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
344   * @see #depthFirstPostOrder(Object)
345   * @since 24.1
346   */
347  public final Iterable<N> depthFirstPostOrder(Iterable<? extends N> startNodes) {
348    final ImmutableSet<N> validated = validate(startNodes);
349    return new Iterable<N>() {
350      @Override
351      public Iterator<N> iterator() {
352        return newTraversal().postOrder(validated.iterator());
353      }
354    };
355  }
356
357  abstract Traversal<N> newTraversal();
358
359  @SuppressWarnings("CheckReturnValue")
360  private ImmutableSet<N> validate(Iterable<? extends N> startNodes) {
361    ImmutableSet<N> copy = ImmutableSet.copyOf(startNodes);
362    for (N node : copy) {
363      successorFunction.successors(node); // Will throw if node doesn't exist
364    }
365    return copy;
366  }
367
368  /**
369   * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take
370   * the next element from the next non-empty iterator; for graph, we need to loop through the next
371   * non-empty iterator to find first unvisited node.
372   */
373  private abstract static class Traversal<N> {
374    final SuccessorsFunction<N> successorFunction;
375
376    Traversal(SuccessorsFunction<N> successorFunction) {
377      this.successorFunction = successorFunction;
378    }
379
380    static <N> Traversal<N> inGraph(SuccessorsFunction<N> graph) {
381      final Set<N> visited = new HashSet<>();
382      return new Traversal<N>(graph) {
383        @Override
384        N visitNext(Deque<Iterator<? extends N>> horizon) {
385          Iterator<? extends N> top = horizon.getFirst();
386          while (top.hasNext()) {
387            N element = checkNotNull(top.next());
388            if (visited.add(element)) {
389              return element;
390            }
391          }
392          horizon.removeFirst();
393          return null;
394        }
395      };
396    }
397
398    static <N> Traversal<N> inTree(SuccessorsFunction<N> tree) {
399      return new Traversal<N>(tree) {
400        @Override
401        N visitNext(Deque<Iterator<? extends N>> horizon) {
402          Iterator<? extends N> top = horizon.getFirst();
403          if (top.hasNext()) {
404            return checkNotNull(top.next());
405          }
406          horizon.removeFirst();
407          return null;
408        }
409      };
410    }
411
412    final Iterator<N> breadthFirst(Iterator<? extends N> startNodes) {
413      return topDown(startNodes, InsertionOrder.BACK);
414    }
415
416    final Iterator<N> preOrder(Iterator<? extends N> startNodes) {
417      return topDown(startNodes, InsertionOrder.FRONT);
418    }
419
420    /**
421     * In top-down traversal, an ancestor node is always traversed before any of its descendant
422     * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are
423     * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before
424     * aunts for pre-order; while in BFS they are placed at the BACK after aunts.
425     */
426    private Iterator<N> topDown(Iterator<? extends N> startNodes, final InsertionOrder order) {
427      final Deque<Iterator<? extends N>> horizon = new ArrayDeque<>();
428      horizon.add(startNodes);
429      return new AbstractIterator<N>() {
430        @Override
431        protected N computeNext() {
432          do {
433            N next = visitNext(horizon);
434            if (next != null) {
435              Iterator<? extends N> successors = successorFunction.successors(next).iterator();
436              if (successors.hasNext()) {
437                // BFS: horizon.addLast(successors)
438                // Pre-order: horizon.addFirst(successors)
439                order.insertInto(horizon, successors);
440              }
441              return next;
442            }
443          } while (!horizon.isEmpty());
444          return endOfData();
445        }
446      };
447    }
448
449    final Iterator<N> postOrder(Iterator<? extends N> startNodes) {
450      final Deque<N> ancestorStack = new ArrayDeque<>();
451      final Deque<Iterator<? extends N>> horizon = new ArrayDeque<>();
452      horizon.add(startNodes);
453      return new AbstractIterator<N>() {
454        @Override
455        protected N computeNext() {
456          for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) {
457            Iterator<? extends N> successors = successorFunction.successors(next).iterator();
458            if (!successors.hasNext()) {
459              return next;
460            }
461            horizon.addFirst(successors);
462            ancestorStack.push(next);
463          }
464          return ancestorStack.isEmpty() ? endOfData() : ancestorStack.pop();
465        }
466      };
467    }
468
469    /**
470     * Visits the next node from the top iterator of {@code horizon} and returns the visited node.
471     * Null is returned to indicate reaching the end of the top iterator.
472     *
473     * <p>For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return
474     * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure.
475     * (Note, however, that the callers of {@code visitNext()} often insert additional iterators
476     * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive
477     * additional values interleaved with those shown above.)
478     */
479    
480    abstract N visitNext(Deque<Iterator<? extends N>> horizon);
481  }
482
483  /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */
484  private enum InsertionOrder {
485    FRONT {
486      @Override
487      <T> void insertInto(Deque<T> deque, T value) {
488        deque.addFirst(value);
489      }
490    },
491    BACK {
492      @Override
493      <T> void insertInto(Deque<T> deque, T value) {
494        deque.addLast(value);
495      }
496    };
497
498    abstract <T> void insertInto(Deque<T> deque, T value);
499  }
500}