001
002
003
004package org.biojava.utils.automata;
005import java.util.HashSet;
006import java.util.Iterator;
007import java.util.Set;
008
009import org.biojava.bio.BioError;
010import org.biojava.bio.symbol.AlphabetManager;
011import org.biojava.bio.symbol.FiniteAlphabet;
012import org.biojava.bio.symbol.IllegalSymbolException;
013import org.biojava.bio.symbol.Symbol;
014
015/**
016 * Class for modelling non-deterministic finite automata.
017 * <p>
018 * This implementation has epsilon and lambda transitions.
019 * Both transitions are silent but the former is intended
020 * to be optimised away while the latter must be retained
021 * during optimisation.  This is necessary to implement
022 * limited closure for the REs that one may want to build
023 * with this NFA.
024 *
025 * @author David Huen
026 * @since 1.4
027 */
028public class Nfa
029    extends FiniteAutomaton
030    implements NfaBuilder
031{
032    // Used to indicate a silent transition that can be munged and optimised away.
033    static Symbol EPSILON = null;
034    // Used to indicate a silent transition that must be preserved during munging.
035    static Symbol LAMBDA = AlphabetManager.createSymbol("lambda");
036
037    public Nfa(String name, FiniteAlphabet alfa)
038    {
039        super(name, alfa);
040    }
041
042    protected int alphaIndex(Symbol sym)
043        throws IllegalSymbolException
044    {
045        if (sym == LAMBDA) return 998;
046        else
047            return super.alphaIndex(sym);
048    }
049
050    public boolean containsNode(Node node)
051    {
052        return nodes.contains(node);
053    }
054
055//    public void addNode() { super.addNode(); }
056
057    /**
058     * Add a silent optimisable transition to instance.
059     */
060    public Transition addEpsilonTransition(Node start, Node end)
061    {
062        return addTransition(start, end, EPSILON);
063    }
064
065    /**
066     * Add a silent persistent transition to instance.
067     */
068    public Transition addLambdaTransition(Node start, Node end)
069    {
070        return addTransition(start, end, LAMBDA);
071    }
072
073    /**
074     * merge all nodes linked by emission-less transitions.
075     */
076    void doEpsilonClosure()
077    {
078        // when accumulating closure set, ensure that it
079        // start and end are in there, they become the
080        // replacement.
081        Set closure = new HashSet();
082
083        boolean foundEpsilonTransitions;
084        do {
085            closure.clear();
086            foundEpsilonTransitions = false;
087            for (Iterator transI = transitions.iterator(); transI.hasNext(); ) {
088                Transition currTransition = (Transition) transI.next();
089    
090                if (currTransition.sym == EPSILON) {
091                    foundEpsilonTransitions = true;
092                    if (closure.isEmpty()) {
093                        // start a new closure
094                        closure.add(currTransition.source);
095                        closure.add(currTransition.dest);
096                    }
097                    else {
098                        // if this transition is linked with any of those I
099                        // have previously encountered, coalesce them.
100                        if ((closure.contains(currTransition.source)) ||
101                            (closure.contains(currTransition.dest))) {
102                                closure.add(currTransition.source);
103                                closure.add(currTransition.dest);
104                        }
105                    }
106                }
107            }
108
109            // found a closure
110            if (foundEpsilonTransitions) {
111                // specify the Node that will act for rest
112                // in closure set.
113                boolean containsStart = closure.contains(start);
114                boolean containsEnd = closure.contains(end);
115                if (containsStart && containsEnd) {
116                    throw new BioError("The epsilon transitions span entire model, you fool!");
117                }
118
119                Node vicar = null;
120                if (containsStart) 
121                    vicar = start;
122                else if (containsEnd)
123                    vicar = end;
124                else
125                    // silly way to have to retrieve an entry from a set....
126                    vicar = (Node) closure.iterator().next();
127                
128                replaceNode(closure, vicar);
129            }
130        }
131        while (foundEpsilonTransitions);
132    }
133
134    /**
135     * Retrieve all Nodes reachable from the specified node by
136     * emissionless lambda transitions.
137     */
138    // FIXME: take into consideration cycles of lambda transitions!
139    NodeSet getLambdaClosure(Node node)
140        throws AutomatonException
141    {
142        return _getLambdaClosure(node, createNodeSet());
143    }
144
145    private NodeSet _getLambdaClosure(Node node, NodeSet visitedNodes)
146        throws AutomatonException
147    {
148        // I've visited this one too!
149        visitedNodes.addNode(node);
150
151        NodeSet closureSet = createNodeSet();
152
153        NodeSet thisClosure = getClosure(node, LAMBDA);
154        closureSet.addNodeSet(thisClosure);
155
156        for (Iterator closI = thisClosure.iterator(); closI.hasNext(); ) {
157            Node currNode = (Node) closI.next();
158            if (visitedNodes.contains(currNode)) continue;
159            closureSet.addNodeSet(_getLambdaClosure(currNode, visitedNodes));
160        }
161
162        return closureSet;
163    }
164
165    /**
166     * goes thru data structures replacing every instance
167     * of old Node with new Node.  Duplicate entries that
168     * arise from the process are removed too.
169     * <p>
170     * The Nfa version of this method also strips
171     * epsilon self-transitions.
172     */
173    private void replaceNode(Set oldNodes, Node newNode)
174    {
175        //System.out.println("oldNodes: " + oldNodes);
176        //System.out.println("newNode:  " + newNode);
177        // prepare to replace entire contents of transitions
178        Transition [] transitionArray = new Transition[transitions.size()];
179
180        // loop thru' all transitions replacing the oldNodes
181        int j = 0;
182        for (Iterator tranI = transitions.iterator(); tranI.hasNext();) {
183            Transition currTransition = (Transition) tranI.next();
184            //System.out.println(currTransition);
185            if (oldNodes.contains(currTransition.source)) {
186                currTransition.source = newNode;
187            }
188            if (oldNodes.contains(currTransition.dest)) {
189                currTransition.dest = newNode;
190            }
191            transitionArray[j++] = currTransition;
192        }
193
194        // put back in transitions. Set behaviour will remove duplicates.
195        transitions.clear();
196        //System.out.println("j is " + j);
197        for (int i=0; i < j; i++) {
198            // put back in all non-silly transitions: epsilon self-transitions are silly.
199            Transition currTransition = transitionArray[i];
200
201            if ((currTransition.sym != EPSILON) ||
202                (currTransition.source != currTransition.dest))
203                transitions.add(currTransition);
204        }
205
206        // now clean up the nodes
207        //System.out.println("removing oldNodes");
208        for (Iterator oldI = oldNodes.iterator(); oldI.hasNext(); ) {
209            nodes.remove(oldI.next());
210        }
211
212        nodes.add(newNode);
213    }
214
215}
216