001
002package org.biojava.utils.automata;
003
004import java.util.HashMap;
005import java.util.HashSet;
006import java.util.Iterator;
007import java.util.Map;
008import java.util.Set;
009
010import org.biojava.bio.symbol.Symbol;
011
012public class DfaBuilder
013{
014    private class DfaNode
015    {
016        FiniteAutomaton.NodeSet closureSet;
017        FiniteAutomaton.Node node;
018
019        private DfaNode(FiniteAutomaton.NodeSet closureSet)
020        {
021            //System.out.println("in DfaNode constructor");
022            this.closureSet = closureSet;
023            //System.out.println("initialising this.closureSet");
024            node = dfa.addNode(isTerminal());
025            //System.out.println("DfaNode created.");
026        }
027
028        private DfaNode(FiniteAutomaton.Node node, FiniteAutomaton.NodeSet closureSet)
029        {
030            this.closureSet = closureSet;
031            this.node = node;
032        }
033
034        /**
035         * checks whether this closure set has a terminal
036         * Node in it.
037         */
038        private boolean isTerminal()
039        {
040            for (Iterator closI = closureSet.iterator(); closI.hasNext(); ) {
041                FiniteAutomaton.Node currNode = (FiniteAutomaton.Node) closI.next();
042                //System.out.println("isTerminal() checking: " + currNode);
043                if (currNode.isTerminal())
044                    return true;
045            }
046            return false;
047        }
048
049        public String toString()
050        {
051            StringBuffer output = new StringBuffer();
052
053            output.append(node.toString());
054            output.append("\n");
055            output.append(closureSet.toString());
056            output.append("\n");
057
058            return output.toString();
059        }
060    }
061
062    private Nfa nfa;
063    private FiniteAutomaton dfa;
064    private boolean converted = false;
065    private Map closureSets = new HashMap();
066
067    DfaBuilder(Nfa nfa)
068        throws AutomatonException
069    {
070        this.nfa = nfa;
071
072        dfa = new FiniteAutomaton("dfa_" + nfa.getName(), nfa.getAlphabet());
073
074        // initialise DFA and the closureSets Map with the initial mapping.
075        FiniteAutomaton.NodeSet initialSet = nfa.createNodeSet();
076        initialSet.addNode(nfa.getStart());
077        closureSets.put(initialSet, new DfaNode(dfa.getStart(), initialSet));
078    }
079
080    public FiniteAutomaton getDFA()
081        throws AutomatonException
082    {
083        //System.out.println("getDFA called.");
084        if (!converted) {
085            constructSubsets();
086            converted = true;
087        }
088        return dfa;
089    }
090
091    public void constructSubsets()
092        throws AutomatonException
093    {
094        //System.out.println("constructSubsets() called.");
095        // the initial NodeSet needs to contain the start state
096        // and the lambda closure of the start state.
097        FiniteAutomaton.NodeSet initialSet = nfa.createNodeSet();
098        initialSet.addNodeSet(nfa.getLambdaClosure(nfa.getStart()));
099        //System.out.println("adding initial node to dfa.");
100        initialSet.addNode(nfa.getStart());
101        
102        //System.out.println("starting constructSubsets(...)");
103        constructSubsets(getDfaNode(initialSet));
104    }
105
106    /**
107     * Given a DFA node representing a Set of NFA nodes,
108     * construct other DFA nodes that transit from it.
109     */
110    private void constructSubsets(DfaNode dfaNode)
111        throws AutomatonException
112    {
113        //System.out.println("constructSubsets:\n" + dfaNode.toString());
114        // retrieve the NFA nodes
115        FiniteAutomaton.NodeSet closureSet = dfaNode.closureSet;
116
117        // what Symbols have transitions from the source closure set?
118        Set symbolSet = new HashSet();
119        for (Iterator closI = closureSet.iterator(); closI.hasNext(); ) {
120            org.biojava.utils.automata.Nfa.Node node = (org.biojava.utils.automata.Nfa.Node) closI.next();
121
122            symbolSet.addAll(nfa.getSymbols(node));
123        }
124
125        // if there are lambda transitions from the source node set,
126        // record this fact then remove LAMBDA from the symbol set.
127        boolean isLambdaAffected = symbolSet.contains(Nfa.LAMBDA);
128        if (isLambdaAffected) symbolSet.remove(Nfa.LAMBDA);
129
130        // for each of the NFA nodes and each Symbol, compute
131        // the next closure Sets and their associated DFA node.
132        //System.out.println("constructSubsets going thru symbols for closure. " + symbolSet);
133        for (Iterator symI = symbolSet.iterator(); symI.hasNext(); ) {
134            Symbol currSymbol = (Symbol) symI.next();
135
136            FiniteAutomaton.NodeSet closureForSymbol = nfa.createNodeSet();
137
138            //System.out.println("constructSubsets going thru Nodes for Symbol.");
139            for (Iterator closI = closureSet.iterator(); closI.hasNext(); ) {
140                org.biojava.utils.automata.Nfa.Node node = (org.biojava.utils.automata.Nfa.Node) closI.next();
141
142                // add closure set for current symbol for this NFA node to closureSet
143                FiniteAutomaton.NodeSet currNodeSet = nfa.getClosure(node, currSymbol);
144                //System.out.println("closure set for " + currSymbol.getName() + " from NFA node " + node.getID() + " is " + currNodeSet.toString());
145                closureForSymbol.addNodeSet(currNodeSet);
146                // add lambda closure to closureSet too.
147                closureForSymbol.addNodeSet(nfa.getLambdaClosure(node));
148            }
149
150            if (!closureForSymbol.isEmpty()) {
151                // check whether this NodeSet has had a DfaNode assigned to it.
152                if (closureSets.containsKey(closureForSymbol)) {
153                    // the new Transition ends with a known DfaNode.
154
155                    // add transition from dfa Node to a existing Node for this closure set.
156                    DfaNode dfaDestNode = getDfaNode(closureForSymbol);
157                    dfa.addTransition(dfaNode.node, dfaDestNode.node, currSymbol);
158                }
159                else {
160                    // this is a novel closure set.
161
162                    // create a DfaNode for this closure set.
163                    DfaNode dfaDestNode = getDfaNode(closureForSymbol);
164
165                    // add transition from dfa Node to this new Node.
166                    dfa.addTransition(dfaNode.node, dfaDestNode.node, currSymbol);
167
168                    // call self recursively with the new Node.
169                    constructSubsets(dfaDestNode);
170                }
171            }
172        }
173    }
174
175    /**
176     * get the DFA Node associated with the closure Set of NFA nodes.
177     * If it does not exist, create it.
178     */
179    private DfaNode getDfaNode(FiniteAutomaton.NodeSet nfaNodes)
180    {
181        //System.out.println("getDfaNode called with " + nfaNodes.toString());
182        DfaNode dfaNode = (DfaNode) closureSets.get(nfaNodes);
183
184        //System.out.println("dfaNode is " + dfaNode);
185        if (dfaNode == null) {
186            //System.out.println("putting new DfaNode into closureSets.");
187            dfaNode = new DfaNode(nfaNodes);
188            closureSets.put(nfaNodes, dfaNode);
189        }
190
191        return dfaNode;
192    }
193}