/* * Reference ETL Parser for Java * Copyright (c) 2000-2009 Constantine A Plotnikov * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package net.sf.etl.parsers.internal.term_parser.flattened; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; /** * <p> * This class represents directed acyclic graph. The implementation is biased * toward small collections and maintains transitive closure for all nodes. * </p> * * <p> * It is also impossible to remove nodes from graph. * </p> * * <p> * IMPORTANTANT: This class is not thread safe and it is not designed to be * reusable outside of this package. * </p> * * @author const * @param <E> * element type */ // NOTE if this would be too slow, use BitSet based implementation. Nodes are // never removed from grammar. So it should not be too complex to implement. // Each node would have four bitset that represent immediate and all parents // and children by index. public final class DirectedAcyclicGraph<E> { /** nodes that do not have children */ final Set<Node<E>> leafs = new HashSet<Node<E>>(); /** nodes that do not have parent */ final Set<Node<E>> roots = new HashSet<Node<E>>(); /** * A map from objects to nodes */ final Map<E, Node<E>> objects = new HashMap<E, Node<E>>(); /** * This call represent directed acyclic graph and also provides a number of * operations on it. */ public DirectedAcyclicGraph() { super(); } /** * Get node for object. If node already exists it is returned, otherwise it * is created. * * @param o * an object to be wrapped into node * @return a new node */ public Node<E> getNode(E o) { Node<E> rc = objects.get(o); if (rc == null) { rc = new Node<E>(this, o); objects.put(o, rc); leafs.add(rc); roots.add(rc); } return rc; } /** * This method minimizes amount of immediate parents and children for all * nodes */ public void minimizeImmediate() { for (final Node<E> n : objects.values()) { n.minimizeImmediate(); } } /** * Rank comparator. It compares two nodes by rank. It is used to sort nodes * topologically. */ private static Comparator<Node<?>> RANK_COMPARATOR = new Comparator<Node<?>>() { public int compare(Node<?> o1, Node<?> o2) { return o1.rank - o2.rank; } }; /** * @return sort nodes topologically */ public List<Node<E>> topologicalSortNodes() { final ArrayList<Node<E>> rc = new ArrayList<Node<E>>(objects.values()); Collections.sort(rc, RANK_COMPARATOR); return rc; } /** * This interface is used to get information from nodes. * * @param <E> * element type */ public static final class Node<E> { /** * Graph */ final DirectedAcyclicGraph<E> dag; /** node rank. */ int rank = 0; /** * A value wrapped into node */ private final E value; /** * a collection of immediate parents */ private final Set<Node<E>> immediateParents = new HashSet<Node<E>>(); /** * a collection of immediate parents */ private final Set<Node<E>> allParents = new HashSet<Node<E>>(); /** * a collection of immediate children */ private final Set<Node<E>> immediateChildren = new HashSet<Node<E>>(); /** * a collection of all children */ private final Set<Node<E>> allChildren = new HashSet<Node<E>>(); /** * A constructor * * @param dag * graph that holds the node * * @param value */ public Node(DirectedAcyclicGraph<E> dag, E value) { this.dag = dag; this.value = value; } /** * minimize number of immediate children and parents of all nodes. This * is an optimization step in order not to consider indirect imports. */ void minimizeImmediate() { // minimize children final HashSet<Node<E>> children = new HashSet<Node<E>>( immediateChildren); for (final Node<E> child : immediateChildren) { children.removeAll(child.allChildren); } immediateChildren.retainAll(children); // minimize parents final HashSet<Node<E>> parents = new HashSet<Node<E>>( immediateParents); for (final Node<E> parent : immediateParents) { parents.removeAll(parent.allParents); } immediateParents.retainAll(parents); } /** * @param parent * a node to be checked * @return true if parent is actually a parent node */ public boolean hasImmediateParent(E parent) { return hasImmediateParentNode(dag.getNode(parent)); } /** * @param parent * a node to be checked * @return true if parent is actually a parent node */ public boolean hasImmediateParentNode(Node<E> parent) { return immediateParents.contains(parent); } /** * @param child * a node to be checked * @return true if child is actually an immediate child node */ public boolean hasImmediateChild(E child) { return immediateParents.contains(dag.getNode(child)); } /** * @param parent * a node to be checked * @return true if parent is actually a parent node */ public boolean hasParent(E parent) { return hasParentNode(dag.getNode(parent)); } /** * @param parent * a node to be checked * @return true if parent is actually a parent node */ public boolean hasParentNode(Node<E> parent) { return allParents.contains(parent); } /** * Add parent node * * @param parent * an new parent for this node * @return true if node is added, false if adding node would have create * cycle in the graph */ public boolean addParent(E parent) { return addParentNode(dag.getNode(parent)); } /** * Add pair to the graph * * @param parent * a parent node * @param child * a child node * @return true if pair was created or false if it would have created * cycle */ private boolean addPair(Node<E> parent, Node<E> child) { if (child == parent) { return false; } else if (child.allChildren.contains(parent)) { return false; } else if (child.immediateParents.contains(parent)) { return true; } else { // establish child link dag.leafs.remove(parent); parent.immediateChildren.add(child); parent.allChildren.add(child); parent.allChildren.addAll(child.allChildren); for (final Node<E> grandParentNode : parent.allParents) { grandParentNode.allChildren.add(child); grandParentNode.allChildren.addAll(child.allChildren); } // establish parent link dag.roots.remove(child); child.immediateParents.add(parent); child.allParents.add(parent); child.allParents.addAll(parent.allParents); for (final Node<E> grandChildNode : child.allChildren) { grandChildNode.allParents.add(parent); grandChildNode.allParents.addAll(parent.allParents); } // propagate rank update child.updateRank(parent.rank + 1); return true; } } /** * Update rank * * @param newRank * new rank that node should have */ private void updateRank(int newRank) { if (rank < newRank) { rank = newRank; for (final Node<E> n : immediateChildren) { n.updateRank(rank + 1); } } } /** * Add child node * * @param child * a new child node * @return true if node is added, false if adding node would have create * cycle in the graph */ public boolean addChild(E child) { return addPair(this, dag.getNode(child)); } /** * @return Returns the value. */ public E getValue() { return value; } /** * @return iterator over immediate parents */ public Iterator<E> immediateParentsIterator() { return new NodeUnwrapIterator<E>(immediateParents.iterator()); } /** * Add parent node * * @param parent * an new parent for this node * @return true if node is added, false if adding node would have create * cycle in the graph */ public boolean addParentNode(Node<E> parent) { return addPair(parent, this); } /** * @return a collection of immediate parents */ public Set<Node<E>> immedaiteParentNodes() { return Collections.unmodifiableSet(immediateParents); } } /** * @return list of nodes sorted topologically (parents are first) */ public List<E> topologicalSortObjects() { final List<E> rc = new ArrayList<E>(); for (final Node<E> n : topologicalSortNodes()) { rc.add(n.value); } return rc; } /** * This iterator iterates over values contained in nodes. * * @author const * @param <E> * element type */ static class NodeUnwrapIterator<E> implements Iterator<E> { /** iterator over collection of nodes */ final Iterator<Node<E>> i; /** * A constructor from collection iterator * * @param i */ public NodeUnwrapIterator(Iterator<Node<E>> i) { super(); this.i = i; } /** * {@inheritDoc} */ public void remove() { i.remove(); } /** * {@inheritDoc} */ public boolean hasNext() { return i.hasNext(); } /** * {@inheritDoc} */ public E next() { return i.next().getValue(); } } /** * This is basic definition gatherer algorithm implementation over DAG. A * set of abstract methods is quite ad hoc, and it will be possibly refined * later. * * @author const * @param <DefinitionHolder> * a holder node that holds definitions * @param <DefinitionKey> * key that identifies definition within holder * @param <Definition> * definition */ public static abstract class DefinitionGatherer<DefinitionHolder, DefinitionKey, Definition> { /** * Gather definitions related to definition holders. The algorithm * assumes that it has been already applied to all immediate parents of * this node in topological sort order. * * @param sourceNode * a node for which definitions will be gathered. */ public final void gatherDefinitions(DefinitionHolder sourceNode) { // get map with definitions considered native. final Map<DefinitionKey, Definition> existingDefinitions = definitionMap(sourceNode); // this map contains definitions gathered from parent holders final Map<DefinitionKey, HashSet<Definition>> allDefinitions = new HashMap<DefinitionKey, HashSet<Definition>>(); // iterate gather all immediate parents. for (final Iterator<DefinitionHolder> i = getHolderNode(sourceNode) .immediateParentsIterator(); i.hasNext();) { final DefinitionHolder parentHolder = i.next(); for (Definition definitionFromParent : definitionMap( parentHolder).values()) { final DefinitionKey definitionFromParentKey = definitionKey(definitionFromParent); // any processing is done only if we do not already have the // node if (!existingDefinitions .containsKey(definitionFromParentKey)) { HashSet<Definition> definitions = allDefinitions .get(definitionFromParentKey); if (definitions == null) { definitions = new HashSet<Definition>(); allDefinitions.put(definitionFromParentKey, definitions); } // definition is ignored if it is already available by // some other path if (!definitions.contains(definitionFromParent)) { // check if there are definitions hidden by this // nodes or definitions that hide this node. for (final Iterator<Definition> k = definitions .iterator(); k.hasNext();) { final Definition existingDefinition = k.next(); if (definitionNode(existingDefinition) .hasParentNode( definitionNode(definitionFromParent))) { // new definition is hidden by the // definition that is already in the set, so // the new definition will be ignored definitionFromParent = null; break; } else if (definitionNode(definitionFromParent) .hasParentNode( definitionNode(existingDefinition))) { // new definition hides the definition from // the set, the existing definition will be // removed. k.remove(); } } // add node if no node that hides it has been // detected if (definitionFromParent != null) { definitions.add(definitionFromParent); } } } } } // see if there is any conflict and add good imports for (final Map.Entry<DefinitionKey, HashSet<Definition>> e : allDefinitions .entrySet()) { final HashSet<Definition> v = e.getValue(); if (v.size() != 1) { reportDuplicates(sourceNode, e.getKey(), v); } // add arbitrary definition for the set. Note that // reportDuplicates() method has a chance to resolve conflict. existingDefinitions.put(e.getKey(), includingDefinition( sourceNode, v.iterator().next())); } } /** * In case if definition is wrapped when it is added to the all * definitions map, this method allows to find original definition in * defining context. * * @param def * a potentially wrapped definition * @return an original definition that was wrapped. */ protected Definition originalDefinition(Definition def) { return def; } /** * When object is included from parent holder, this callback method give * subclasses a chance to perform an additional processing on the node * or to replace it with derived node. * * @param sourceHolder * a new holder for the definition * @param object * an object to process * @return a processed object */ protected Definition includingDefinition(DefinitionHolder sourceHolder, Definition object) { return object; } /** * Report problem with definitions. The method checks if there is an * actually conflict (for example if definitions are the same, there is * no conflict). The method also has a chance to resolve conflict by * removing objects from set. * * @param sourceHolder * a source holder for definition. * @param key * a key for which conflict exists * @param duplicateNodes * a set of duplicate definitions */ protected abstract void reportDuplicates(DefinitionHolder sourceHolder, DefinitionKey key, HashSet<Definition> duplicateNodes); /** * Get an DAG node for definition holder * * @param definitionHolder * a definition holder * @return a actual node that contains definition */ protected abstract Node<DefinitionHolder> getHolderNode( DefinitionHolder definitionHolder); /** * Get the defining DAG node for definition * * @param definition * a definition to examine * @return the defining DAG node for definition */ protected abstract Node<DefinitionHolder> definitionNode( Definition definition); /** * Get key of the definition * * @param definition * a definition to examine * @return the key that identifies definition */ protected abstract DefinitionKey definitionKey(Definition definition); /** * Get the map that contains definition that are directly contained by * definition holder object. The method updates the definition map After * it stops working * * @param holder * a holder object to examine * @return the map of immediate definitions. */ protected abstract Map<DefinitionKey, Definition> definitionMap( DefinitionHolder holder); } /** * Base class for import definition gatherer. It adds standard error * reporting mechanism * * @author const * @param <DefinitionHolder> * a holder node that holds definitions * @param <DefinitionKey> * key that identifies definition within holder * @param <Definition> * definition * @param <ImportedObject> * an object imported though definition * */ public static abstract class ImportDefinitionGatherer<DefinitionHolder, DefinitionKey, Definition, ImportedObject> extends DefinitionGatherer<DefinitionHolder, DefinitionKey, Definition> { /** * {@inheritDoc} */ @Override protected final void reportDuplicates(DefinitionHolder sourceHolder, DefinitionKey key, HashSet<Definition> duplicateNodes) { ImportedObject importedObject = null; // in case of imports there is no conflict if all imports point to // the same place for (final Definition gi : duplicateNodes) { if (importedObject == null) { importedObject = importedObject(gi); } else if (importedObject == importedObject(gi)) { // do nothing } else { // error is only reported if imports are pointing to // different locations reportDuplicateImportError(sourceHolder, key); break; } } } /** * This method is used to report duplicates * * @param sourceHolder * definition holder node * @param key * a key for which error happened */ protected abstract void reportDuplicateImportError( DefinitionHolder sourceHolder, DefinitionKey key); /** * @param importDefinition * an import definition * @return an object that is being imported by this definition */ protected abstract ImportedObject importedObject( Definition importDefinition); } }