001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.utils.automata; 023import java.util.Collections; 024import java.util.HashMap; 025import java.util.HashSet; 026import java.util.Iterator; 027import java.util.Map; 028import java.util.Set; 029 030import org.biojava.bio.BioError; 031import org.biojava.bio.BioException; 032import org.biojava.bio.seq.io.SymbolTokenization; 033import org.biojava.bio.symbol.AlphabetIndex; 034import org.biojava.bio.symbol.AlphabetManager; 035import org.biojava.bio.symbol.FiniteAlphabet; 036import org.biojava.bio.symbol.IllegalSymbolException; 037import org.biojava.bio.symbol.Symbol; 038 039/** 040 * Class for modelling finite automata. 041 * <p> 042 * This class models basic FA behaviour. More specialised 043 * behaviour is implemented by subclassing this. 044 * 045 * @author David Huen 046 * @since 1.4 047 */ 048public class FiniteAutomaton 049{ 050 /** 051 * describes a Node in this finite automaton. 052 */ 053 class Node 054 { 055 private int nodeID; 056 057 /** 058 * Nodes are assigned nodeIDs sequentially. 059 * Terminal nodes are assigned negative 060 * Node IDs. 061 */ 062 protected Node(boolean terminal) 063 { 064 if (terminal) 065 nodeID = -(nodeCount++); 066 else 067 nodeID = nodeCount++; 068 } 069 070 protected Node(int nodeID) 071 { 072 this.nodeID = nodeID; 073 } 074 075 /** 076 * is this a terminal Node? 077 */ 078 boolean isTerminal() 079 { 080 return nodeID < 0; 081 } 082 083 /** 084 * returns the Node ID. 085 */ 086 int getID() { return nodeID; } 087 088 /** 089 * Two Nodes are equal if they share the same 090 * parent and have the same node ID. 091 */ 092 public boolean equals(Object o) 093 { 094 if (!(o instanceof Node)) 095 return false; 096 097 // check that outer class is the same! 098 Node other = (Node) o; 099 100 if (other.parent() != parent()) 101 return false; 102 103 if ((other.getID() != getID())) 104 return true; 105 else 106 return false; 107 } 108 109 public FiniteAutomaton parent() { return FiniteAutomaton.this; } 110 public int hashCode() { return nodeID; } 111 public String toString() { return "Node: " + nodeID; } 112 } 113 114 /** 115 * a class that represents a Set of Nodes. 116 */ 117 class NodeSet 118 extends HashSet 119 { 120 /** 121 * Adds a Node to this NodeSet. The Node and the NodeSet 122 * to which it is added must be from the same FiniteAutomaton 123 * instance. 124 */ 125 void addNode(Node node) 126 throws AutomatonException 127 { 128 if (!(nodes.contains(node))) 129 throw new AutomatonException("attempt to add foreign Node to this NodeSet."); 130 131 add(node); 132 } 133 134 void addNodeSet(NodeSet otherSet) 135 throws AutomatonException 136 { 137 // it will be assumed that the other NodeSet is legit 138 // if it comes from this FiniteAutomaton. 139 if (otherSet.parent() != FiniteAutomaton.this) 140 throw new AutomatonException("attempt to add foreign NodeSet to this NodeSet."); 141 142 addAll(otherSet); 143 } 144 145 /** 146 * Equality implies that both Objects are NodeSets, 147 * both from the same FiniteAutomaton instance and 148 * have the same Nodes within them. 149 */ 150 public boolean equals(Object o) 151 { 152 // only NodeSets can be compared. 153 if (!(o instanceof NodeSet)) 154 return false; 155 156 // Contents of NodeSets must be from same model. 157 NodeSet other = (NodeSet) o; 158 if (parent() != other.parent()) 159 return false; 160 161 // sets themselves must be equal 162 //System.out.println(toString() + " equals? " + other.toString()); 163 return super.equals(o); 164 } 165 166 /** 167 * The hashCode is currently the sum of Node hashCodes. 168 */ 169 public int hashCode() 170 { 171 int sum = 0; 172 173 for (Iterator setI = iterator(); setI.hasNext(); ) { 174 sum += setI.next().hashCode(); 175 } 176 177 return sum; 178 } 179 180 /** 181 * Returns the parental FiniteAutomaton that 182 * this NodeSet is a child of. 183 */ 184 FiniteAutomaton parent() 185 { 186 return FiniteAutomaton.this; 187 } 188 189 public String toString() 190 { 191 StringBuffer output = new StringBuffer(); 192 193 // dump nodes 194 output.append("["); 195 int count = 0; 196 for (Iterator nodeI = iterator(); nodeI.hasNext(); ) { 197 Node node = (Node) nodeI.next(); 198 if (count++ != 0) output.append(", "); 199 output.append(node.getID()); 200 } 201 output.append("]"); 202 203 return output.toString(); 204 } 205 } 206 207 /** 208 * Models a Transition within the FiniteAutomaton. 209 */ 210 class Transition 211 { 212 protected Node source; 213 protected Node dest; 214 protected Symbol sym; 215 216 /** 217 * @param source The source Node of the transition. 218 * @param dest The destination Node of the transition. 219 * @param sym The Symbol that invokes the transition. 220 */ 221 private Transition(Node source, Node dest, Symbol sym) 222 { 223 this.source = source; 224 this.dest = dest; 225 this.sym = sym; 226 } 227 228 Node getSource() { return source; } 229 Node getDest() { return dest; } 230 Symbol getSymbol() { return sym; } 231 232 /** 233 * Sets the source Node. 234 */ 235 void setSource(Node source) 236 throws AutomatonException 237 { 238 if (source.parent() != FiniteAutomaton.this) 239 throw new AutomatonException("Attempt to Node from one Model in another."); 240 241 this.source = source; 242 } 243 244 /** 245 * Sets the destination Node. 246 */ 247 void setDest(Node dest) 248 throws AutomatonException 249 { 250 if (dest.parent() != FiniteAutomaton.this) 251 throw new AutomatonException("Attempt to Node from one Model in another."); 252 253 this.dest = dest; 254 } 255 256 /** 257 * Two Transitions are equal if they are both 258 * defined on the same FiniteAutomaton instance 259 * and their source and destination Nodes and associated 260 * Symbols are equal. 261 */ 262 public boolean equals(Object o) 263 { 264 if (!(o instanceof Transition)) 265 return false; 266 267 // check that outer class is the same! 268 Transition other = (Transition) o; 269 270 if (other.parent() != parent()) 271 return false; 272 273 if ((source.equals(other.source)) && 274 (dest.equals(other.dest)) && 275 (sym == other.getSymbol())) 276 return true; 277 else 278 return false; 279 } 280 281 public FiniteAutomaton parent() { return FiniteAutomaton.this; } 282 283 public int hashCode() 284 { 285 try { 286 return (source.getID() << 20) + (dest.getID() << 10) 287 + alphaIndex(sym); 288 } 289 catch (IllegalSymbolException ise) { 290 throw new BioError("Fatal error: Unexpected IllegalSymbolException on computing indexForSymbol."); 291 } 292 } 293 294 public String toString() 295 { 296 StringBuffer output = new StringBuffer(); 297 try { 298 SymbolTokenization toke = alfa.getTokenization("token"); 299 output.append(source.getID()); 300 output.append("\t"); 301 output.append(dest.getID()); 302 output.append("\t"); 303 output.append((sym == null)?"EPSILON":toke.tokenizeSymbol(sym)); 304 return output.toString(); 305 } 306 catch (IllegalSymbolException ise) { 307 throw new AssertionError(ise); 308 } 309 catch (BioException be) { 310 throw new AssertionError(be); 311 } 312 } 313 } 314 315 private FiniteAlphabet alfa; 316 private AlphabetIndex alfaIdx; 317 private String name; 318 319 // private data storage 320 /** 321 * User Nodes are assigned Node IDs of 2 and larger. 322 */ 323 private int nodeCount = 2; 324 protected Set nodes = new HashSet(); 325 protected Set transitions = new HashSet(); 326 327 private Map transitionMap = new HashMap(); 328 329 private int START = 0; 330 private int END = -1; 331 protected Node start; 332 protected Node end; 333 334 FiniteAutomaton(String name, FiniteAlphabet alfa) 335 { 336 this.alfa = alfa; 337 this.name = name; 338 alfaIdx = AlphabetManager.getAlphabetIndex(alfa); 339 340 // ensure that the start and end are created 341 // end stored in the node structures. 342 nodes.add(start = new Node(START)); 343 nodes.add(end = new Node(END)); 344 } 345 346 FiniteAlphabet getAlphabet() { return alfa; } 347 String getName() { return name; } 348 public Node getStart() { return start; } 349 public Node getEnd() { return end; } 350 351 public FiniteAutomaton getAutomaton() 352 { 353 return this; 354 } 355 356 public Transition addTransition(Node start, Node end, Symbol sym) 357 { 358 Transition newTransition = new Transition(start, end, sym); 359 transitions.add(newTransition); 360 return newTransition; 361 } 362 363 /** 364 * Add a node to the FA. 365 * @param terminal Is the Node terminal? 366 */ 367 public Node addNode(boolean terminal) 368 { 369 Node node = new Node(terminal); 370 nodes.add(node); 371 return node; 372 } 373 374 int nodeCount() { return nodes.size(); } 375 376 /** 377 * get all Nodes within this instance. 378 */ 379 public NodeSet getNodes() 380 { 381 NodeSet nodeSet = createNodeSet(); 382 nodeSet.addAll(nodes); 383 return nodeSet; 384 } 385 386 /** 387 * What is the Set of Nodes reachable from source 388 * Node by transitions associated with Symbol. 389 */ 390 NodeSet getClosure(Node source, Symbol sym) 391 { 392 NodeSet closureSet = createNodeSet(); 393 394 for (Iterator transI = getTransitions(source).iterator(); transI.hasNext(); ) { 395 Transition currTransition = (Transition) transI.next(); 396 397 try { 398 if (currTransition.sym == sym) 399 closureSet.addNode(currTransition.dest); 400 } 401 catch (AutomatonException ae) { 402 throw new AssertionError(ae); 403 } 404 } 405 406 return closureSet; 407 } 408 409 /** 410 * get Symbols associated with transitions from the specified source. 411 */ 412 Set getSymbols(Node source) 413 { 414 Set symbolSet = new HashSet(); 415 416 for (Iterator transI = getTransitions(source).iterator(); transI.hasNext(); ) { 417 Transition currTransition = (Transition) transI.next(); 418 symbolSet.add(currTransition.sym); 419 } 420 421 return symbolSet; 422 } 423 424 /** 425 * retrieve Set of all transitions in instance. 426 */ 427 public Set getTransitions() 428 { 429 return Collections.unmodifiableSet(transitions); 430 } 431 432 /** 433 * retrieve Set of all transitions from a specified Node. 434 */ 435 private Set getTransitions(Node source) 436 { 437 Set transitionSet; 438 439 if ((transitionSet = (Set) transitionMap.get(source)) == null) { 440 441 transitionSet = new HashSet(); 442 443 for (Iterator transI = transitions.iterator(); transI.hasNext(); ) { 444 Transition currTransition = (Transition) transI.next(); 445 if (currTransition.source == source) 446 transitionSet.add(currTransition); 447 } 448 449 transitionMap.put(source, transitionSet); 450 } 451 452 return transitionSet; 453 } 454 455 public NodeSet createNodeSet() { return new NodeSet(); } 456 457 /** 458 * dumps internal data of Nodes and Transitions that describe 459 * this FiniteAutomaton. It is not possible to dump it as a 460 * regex as there are FA that cannot be expressed as a regex. 461 */ 462 public String toString() 463 { 464 StringBuffer output = new StringBuffer(); 465 466 // dump nodes 467 output.append("Nodes: "); 468 int count = 0; 469 for (Iterator nodeI = nodes.iterator(); nodeI.hasNext(); ) { 470 Node node = (Node) nodeI.next(); 471 if (count++ != 0) output.append(", "); 472 output.append(node.getID()); 473 } 474 475 output.append("\n"); 476 477 // dump transitions 478 // try { 479 output.append("Transitions: source, dest, symbol\n"); 480 for (Iterator tranI = transitions.iterator(); tranI.hasNext(); ) { 481 Transition trans = (Transition) tranI.next(); 482 output.append(trans.toString()); 483 output.append("\n"); 484 } 485 /*} 486 catch (IllegalSymbolException ise) { 487 throw new AssertionError(ise); 488 } 489 catch (BioException be) { 490 throw new AssertionError(be); 491 }*/ 492 return output.toString(); 493 } 494 495 protected int alphaIndex(Symbol sym) 496 throws IllegalSymbolException 497 { 498 if (sym == null) return 999; 499 else 500 return alfaIdx.indexForSymbol(sym); 501 } 502} 503