001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.utils.automata;
023import java.util.Collections;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.Iterator;
027import java.util.Map;
028import java.util.Set;
029
030import org.biojava.bio.BioError;
031import org.biojava.bio.BioException;
032import org.biojava.bio.seq.io.SymbolTokenization;
033import org.biojava.bio.symbol.AlphabetIndex;
034import org.biojava.bio.symbol.AlphabetManager;
035import org.biojava.bio.symbol.FiniteAlphabet;
036import org.biojava.bio.symbol.IllegalSymbolException;
037import org.biojava.bio.symbol.Symbol;
038
039/**
040 * Class for modelling finite automata.
041 * <p>
042 * This class models basic FA behaviour.  More specialised
043 * behaviour is implemented by subclassing this.
044 *
045 * @author David Huen
046 * @since 1.4
047 */
048public class FiniteAutomaton
049{
050    /**
051     * describes a Node in this finite automaton.
052     */
053    class Node
054    {
055        private int nodeID;
056
057        /**
058         * Nodes are assigned nodeIDs sequentially.
059         * Terminal nodes are assigned negative
060         * Node IDs.
061         */
062        protected Node(boolean terminal) 
063        { 
064            if (terminal)
065                nodeID = -(nodeCount++);
066            else
067                nodeID = nodeCount++; 
068        }
069
070        protected Node(int nodeID)
071        {
072            this.nodeID = nodeID;
073        }
074
075        /**
076         * is this a terminal Node?
077         */
078        boolean isTerminal()
079        {
080            return nodeID < 0;
081        }
082
083        /**
084         * returns the Node ID.
085         */
086        int getID() { return nodeID; }
087
088        /**
089         * Two Nodes are equal if they share the same
090         * parent and have the same node ID.
091         */
092        public boolean equals(Object o)
093        {
094            if (!(o instanceof Node))
095                return false;
096
097            // check that outer class is the same!
098            Node other = (Node) o;
099
100            if (other.parent() != parent())
101                return false;
102
103            if ((other.getID() != getID()))
104                return true;
105            else
106                return false;
107        }
108
109        public FiniteAutomaton parent() { return FiniteAutomaton.this; }
110        public int hashCode() { return nodeID; }
111        public String toString() { return "Node: " + nodeID; }
112    }
113
114    /**
115     * a class that represents a Set of Nodes.
116     */
117    class NodeSet
118        extends HashSet
119    {
120        /**
121         * Adds a Node to this NodeSet.  The Node and the NodeSet
122         * to which it is added must be from the same FiniteAutomaton
123         * instance.
124         */
125        void addNode(Node node)
126            throws AutomatonException
127        {
128            if (!(nodes.contains(node)))
129                throw new AutomatonException("attempt to add foreign Node to this NodeSet.");
130
131            add(node);
132        }
133
134        void addNodeSet(NodeSet otherSet)
135            throws AutomatonException
136        {
137            // it will be assumed that the other NodeSet is legit
138            // if it comes from this FiniteAutomaton.
139            if (otherSet.parent() != FiniteAutomaton.this)
140                throw new AutomatonException("attempt to add foreign NodeSet to this NodeSet.");
141
142            addAll(otherSet);
143        }
144
145        /**
146         * Equality implies that both Objects are NodeSets,
147         * both from the same FiniteAutomaton instance and
148         * have the same Nodes within them.
149         */
150        public boolean equals(Object o)
151        {
152            // only NodeSets can be compared.
153            if (!(o instanceof NodeSet))
154                return false;
155
156            // Contents of NodeSets must be from same model.
157            NodeSet other = (NodeSet) o;
158            if (parent() != other.parent())
159                return false;
160
161            // sets themselves must be equal
162            //System.out.println(toString() + " equals? " + other.toString());
163            return super.equals(o);
164        }
165
166        /**
167         * The hashCode is currently the sum of Node hashCodes.
168         */
169        public int hashCode()
170        {
171            int sum = 0;
172
173            for (Iterator setI = iterator(); setI.hasNext(); ) {
174                sum += setI.next().hashCode();
175            }
176
177            return sum;
178        }
179
180        /**
181         * Returns the parental FiniteAutomaton that
182         * this NodeSet is a child of.
183         */
184        FiniteAutomaton parent()
185        {
186            return FiniteAutomaton.this;
187        }
188
189        public String toString()
190        {
191            StringBuffer output = new StringBuffer();
192
193            // dump nodes
194            output.append("[");
195            int count = 0;
196            for (Iterator nodeI = iterator(); nodeI.hasNext(); ) {
197                Node node = (Node) nodeI.next();
198                if (count++ != 0) output.append(", ");
199                output.append(node.getID());
200            }
201            output.append("]");
202
203            return output.toString();
204        }
205    }
206
207    /**
208     * Models a Transition within the FiniteAutomaton.
209     */
210    class Transition
211    {
212        protected Node source;
213        protected Node dest;
214        protected Symbol sym;
215
216        /**
217         * @param source The source Node of the transition.
218         * @param dest The destination Node of the transition.
219         * @param sym The Symbol that invokes the transition.
220         */
221        private Transition(Node source, Node dest, Symbol sym)
222        {
223            this.source = source;
224            this.dest = dest;
225            this.sym = sym;
226        }
227
228        Node getSource() { return source; }
229        Node getDest() { return dest; }
230        Symbol getSymbol() { return sym; }
231
232        /**
233         * Sets the source Node.
234         */
235        void setSource(Node source)
236            throws AutomatonException
237        {
238            if (source.parent() != FiniteAutomaton.this)
239                throw new AutomatonException("Attempt to Node from one Model in another.");
240
241            this.source = source;
242        }
243
244        /**
245         * Sets the destination Node.
246         */
247        void setDest(Node dest)
248            throws AutomatonException
249        {
250            if (dest.parent() != FiniteAutomaton.this)
251                throw new AutomatonException("Attempt to Node from one Model in another.");
252
253            this.dest = dest;
254        }
255
256        /**
257         * Two Transitions are equal if they are both
258         * defined on the same FiniteAutomaton instance
259         * and their source and destination Nodes and associated
260         * Symbols are equal.
261         */
262        public boolean equals(Object o)
263        {
264            if (!(o instanceof Transition))
265                return false;
266
267            // check that outer class is the same!
268            Transition other = (Transition) o;
269
270            if (other.parent() != parent())
271                return false;
272
273            if ((source.equals(other.source)) &&
274                (dest.equals(other.dest)) &&
275                (sym == other.getSymbol()))
276                return true;
277            else
278                return false;
279        }
280
281        public FiniteAutomaton parent() { return FiniteAutomaton.this; }
282
283        public int hashCode() 
284        { 
285            try {
286                return (source.getID() << 20) + (dest.getID() << 10) 
287                    + alphaIndex(sym); 
288            }
289            catch (IllegalSymbolException ise) {
290                throw new BioError("Fatal error: Unexpected IllegalSymbolException on computing indexForSymbol.");
291            }
292        }
293
294        public String toString()
295        {
296            StringBuffer output = new StringBuffer();
297            try {
298                SymbolTokenization toke = alfa.getTokenization("token");
299                output.append(source.getID());
300                output.append("\t");
301                output.append(dest.getID());
302                output.append("\t");
303                output.append((sym == null)?"EPSILON":toke.tokenizeSymbol(sym));
304                return output.toString();
305            }
306            catch (IllegalSymbolException ise) {
307                throw new AssertionError(ise);
308            }
309            catch (BioException be) {
310                throw new AssertionError(be);
311            }
312        }
313    }
314
315    private FiniteAlphabet alfa;
316    private AlphabetIndex alfaIdx;
317    private String name;
318
319    // private data storage
320    /**
321     * User Nodes are assigned Node IDs of 2 and larger.
322     */
323    private int nodeCount = 2;
324    protected Set nodes = new HashSet();
325    protected Set transitions = new HashSet();
326
327    private Map transitionMap = new HashMap();
328
329    private int START = 0;
330    private int END = -1;
331    protected Node start;
332    protected Node end;
333
334    FiniteAutomaton(String name, FiniteAlphabet alfa)
335    {
336        this.alfa = alfa;
337        this.name = name;
338        alfaIdx = AlphabetManager.getAlphabetIndex(alfa);
339
340        // ensure that the start and end are created
341        // end stored in the node structures.
342        nodes.add(start = new Node(START));
343        nodes.add(end = new Node(END));
344    }
345
346    FiniteAlphabet getAlphabet() { return alfa; }
347    String getName() { return name; }
348    public Node getStart() { return start; }
349    public Node getEnd() { return end; }
350
351    public FiniteAutomaton getAutomaton()
352    {
353        return this;
354    }
355
356    public Transition addTransition(Node start, Node end, Symbol sym)
357    {
358        Transition newTransition = new Transition(start, end, sym);
359        transitions.add(newTransition);
360        return newTransition;
361    }
362
363    /**
364     * Add a node to the FA.
365     * @param terminal Is the Node terminal?
366     */
367    public Node addNode(boolean terminal)
368    {
369        Node node = new Node(terminal);
370        nodes.add(node);
371        return node;
372    }
373
374    int nodeCount() { return nodes.size(); }
375
376    /**
377     * get all Nodes within this instance.
378     */
379    public NodeSet getNodes()
380    {
381        NodeSet nodeSet = createNodeSet();
382        nodeSet.addAll(nodes);
383        return nodeSet;
384    }
385
386    /**
387     * What is the Set of Nodes reachable from source
388     * Node by transitions associated with Symbol.
389     */
390    NodeSet getClosure(Node source, Symbol sym)
391    {
392        NodeSet closureSet = createNodeSet();
393
394        for (Iterator transI = getTransitions(source).iterator(); transI.hasNext(); ) {
395            Transition currTransition = (Transition) transI.next();
396
397            try {
398                if (currTransition.sym == sym)
399                    closureSet.addNode(currTransition.dest);
400            }
401            catch (AutomatonException ae) {
402                throw new AssertionError(ae);
403            }
404        }
405
406        return closureSet;
407    }    
408
409    /**
410     * get Symbols associated with transitions from the specified source.
411     */
412    Set getSymbols(Node source)
413    {
414        Set symbolSet = new HashSet();
415
416        for (Iterator transI = getTransitions(source).iterator(); transI.hasNext(); ) {
417            Transition currTransition = (Transition) transI.next();
418            symbolSet.add(currTransition.sym);
419        }
420
421        return symbolSet;
422    }
423
424    /**
425     * retrieve Set of all transitions in instance.
426     */
427     public Set getTransitions()
428    {
429        return Collections.unmodifiableSet(transitions);
430    }
431
432    /**
433     * retrieve Set of all transitions from a specified Node.
434     */
435    private Set getTransitions(Node source)
436    {
437        Set transitionSet;
438
439        if ((transitionSet = (Set) transitionMap.get(source)) == null) {
440
441            transitionSet = new HashSet();
442    
443            for (Iterator transI = transitions.iterator(); transI.hasNext(); ) {
444                Transition currTransition = (Transition) transI.next();
445                if (currTransition.source == source)
446                    transitionSet.add(currTransition);
447            }
448
449            transitionMap.put(source, transitionSet);
450        }
451        
452        return transitionSet;
453    }
454
455    public NodeSet createNodeSet() { return new NodeSet(); }
456
457    /**
458     * dumps internal data of Nodes and Transitions that describe
459     * this FiniteAutomaton. It is not possible to dump it as a 
460     * regex as there are FA that cannot be expressed as a regex.
461     */
462    public String toString()
463    {
464        StringBuffer output = new StringBuffer();
465
466        // dump nodes
467        output.append("Nodes: ");
468        int count = 0;
469        for (Iterator nodeI = nodes.iterator(); nodeI.hasNext(); ) {
470            Node node = (Node) nodeI.next();
471            if (count++ != 0) output.append(", ");
472            output.append(node.getID());
473        }
474
475        output.append("\n");
476
477        // dump transitions
478       // try {
479            output.append("Transitions: source, dest, symbol\n");
480            for (Iterator tranI = transitions.iterator(); tranI.hasNext(); ) {
481                Transition trans = (Transition) tranI.next();
482                output.append(trans.toString());
483                output.append("\n");
484            }
485        /*}
486        catch (IllegalSymbolException ise) {
487            throw new AssertionError(ise);
488        }
489        catch (BioException be) {
490            throw new AssertionError(be);
491        }*/
492        return output.toString();
493    }
494
495    protected int alphaIndex(Symbol sym)
496        throws IllegalSymbolException
497    {
498        if (sym == null) return 999;
499        else
500            return alfaIdx.indexForSymbol(sym);
501    }
502}
503