001 002package org.biojava.utils.automata; 003 004import java.util.HashMap; 005import java.util.HashSet; 006import java.util.Iterator; 007import java.util.Map; 008import java.util.Set; 009 010import org.biojava.bio.symbol.Symbol; 011 012public class DfaBuilder 013{ 014 private class DfaNode 015 { 016 FiniteAutomaton.NodeSet closureSet; 017 FiniteAutomaton.Node node; 018 019 private DfaNode(FiniteAutomaton.NodeSet closureSet) 020 { 021 //System.out.println("in DfaNode constructor"); 022 this.closureSet = closureSet; 023 //System.out.println("initialising this.closureSet"); 024 node = dfa.addNode(isTerminal()); 025 //System.out.println("DfaNode created."); 026 } 027 028 private DfaNode(FiniteAutomaton.Node node, FiniteAutomaton.NodeSet closureSet) 029 { 030 this.closureSet = closureSet; 031 this.node = node; 032 } 033 034 /** 035 * checks whether this closure set has a terminal 036 * Node in it. 037 */ 038 private boolean isTerminal() 039 { 040 for (Iterator closI = closureSet.iterator(); closI.hasNext(); ) { 041 FiniteAutomaton.Node currNode = (FiniteAutomaton.Node) closI.next(); 042 //System.out.println("isTerminal() checking: " + currNode); 043 if (currNode.isTerminal()) 044 return true; 045 } 046 return false; 047 } 048 049 public String toString() 050 { 051 StringBuffer output = new StringBuffer(); 052 053 output.append(node.toString()); 054 output.append("\n"); 055 output.append(closureSet.toString()); 056 output.append("\n"); 057 058 return output.toString(); 059 } 060 } 061 062 private Nfa nfa; 063 private FiniteAutomaton dfa; 064 private boolean converted = false; 065 private Map closureSets = new HashMap(); 066 067 DfaBuilder(Nfa nfa) 068 throws AutomatonException 069 { 070 this.nfa = nfa; 071 072 dfa = new FiniteAutomaton("dfa_" + nfa.getName(), nfa.getAlphabet()); 073 074 // initialise DFA and the closureSets Map with the initial mapping. 075 FiniteAutomaton.NodeSet initialSet = nfa.createNodeSet(); 076 initialSet.addNode(nfa.getStart()); 077 closureSets.put(initialSet, new DfaNode(dfa.getStart(), initialSet)); 078 } 079 080 public FiniteAutomaton getDFA() 081 throws AutomatonException 082 { 083 //System.out.println("getDFA called."); 084 if (!converted) { 085 constructSubsets(); 086 converted = true; 087 } 088 return dfa; 089 } 090 091 public void constructSubsets() 092 throws AutomatonException 093 { 094 //System.out.println("constructSubsets() called."); 095 // the initial NodeSet needs to contain the start state 096 // and the lambda closure of the start state. 097 FiniteAutomaton.NodeSet initialSet = nfa.createNodeSet(); 098 initialSet.addNodeSet(nfa.getLambdaClosure(nfa.getStart())); 099 //System.out.println("adding initial node to dfa."); 100 initialSet.addNode(nfa.getStart()); 101 102 //System.out.println("starting constructSubsets(...)"); 103 constructSubsets(getDfaNode(initialSet)); 104 } 105 106 /** 107 * Given a DFA node representing a Set of NFA nodes, 108 * construct other DFA nodes that transit from it. 109 */ 110 private void constructSubsets(DfaNode dfaNode) 111 throws AutomatonException 112 { 113 //System.out.println("constructSubsets:\n" + dfaNode.toString()); 114 // retrieve the NFA nodes 115 FiniteAutomaton.NodeSet closureSet = dfaNode.closureSet; 116 117 // what Symbols have transitions from the source closure set? 118 Set symbolSet = new HashSet(); 119 for (Iterator closI = closureSet.iterator(); closI.hasNext(); ) { 120 org.biojava.utils.automata.Nfa.Node node = (org.biojava.utils.automata.Nfa.Node) closI.next(); 121 122 symbolSet.addAll(nfa.getSymbols(node)); 123 } 124 125 // if there are lambda transitions from the source node set, 126 // record this fact then remove LAMBDA from the symbol set. 127 boolean isLambdaAffected = symbolSet.contains(Nfa.LAMBDA); 128 if (isLambdaAffected) symbolSet.remove(Nfa.LAMBDA); 129 130 // for each of the NFA nodes and each Symbol, compute 131 // the next closure Sets and their associated DFA node. 132 //System.out.println("constructSubsets going thru symbols for closure. " + symbolSet); 133 for (Iterator symI = symbolSet.iterator(); symI.hasNext(); ) { 134 Symbol currSymbol = (Symbol) symI.next(); 135 136 FiniteAutomaton.NodeSet closureForSymbol = nfa.createNodeSet(); 137 138 //System.out.println("constructSubsets going thru Nodes for Symbol."); 139 for (Iterator closI = closureSet.iterator(); closI.hasNext(); ) { 140 org.biojava.utils.automata.Nfa.Node node = (org.biojava.utils.automata.Nfa.Node) closI.next(); 141 142 // add closure set for current symbol for this NFA node to closureSet 143 FiniteAutomaton.NodeSet currNodeSet = nfa.getClosure(node, currSymbol); 144 //System.out.println("closure set for " + currSymbol.getName() + " from NFA node " + node.getID() + " is " + currNodeSet.toString()); 145 closureForSymbol.addNodeSet(currNodeSet); 146 // add lambda closure to closureSet too. 147 closureForSymbol.addNodeSet(nfa.getLambdaClosure(node)); 148 } 149 150 if (!closureForSymbol.isEmpty()) { 151 // check whether this NodeSet has had a DfaNode assigned to it. 152 if (closureSets.containsKey(closureForSymbol)) { 153 // the new Transition ends with a known DfaNode. 154 155 // add transition from dfa Node to a existing Node for this closure set. 156 DfaNode dfaDestNode = getDfaNode(closureForSymbol); 157 dfa.addTransition(dfaNode.node, dfaDestNode.node, currSymbol); 158 } 159 else { 160 // this is a novel closure set. 161 162 // create a DfaNode for this closure set. 163 DfaNode dfaDestNode = getDfaNode(closureForSymbol); 164 165 // add transition from dfa Node to this new Node. 166 dfa.addTransition(dfaNode.node, dfaDestNode.node, currSymbol); 167 168 // call self recursively with the new Node. 169 constructSubsets(dfaDestNode); 170 } 171 } 172 } 173 } 174 175 /** 176 * get the DFA Node associated with the closure Set of NFA nodes. 177 * If it does not exist, create it. 178 */ 179 private DfaNode getDfaNode(FiniteAutomaton.NodeSet nfaNodes) 180 { 181 //System.out.println("getDfaNode called with " + nfaNodes.toString()); 182 DfaNode dfaNode = (DfaNode) closureSets.get(nfaNodes); 183 184 //System.out.println("dfaNode is " + dfaNode); 185 if (dfaNode == null) { 186 //System.out.println("putting new DfaNode into closureSets."); 187 dfaNode = new DfaNode(nfaNodes); 188 closureSets.put(nfaNodes, dfaNode); 189 } 190 191 return dfaNode; 192 } 193}